So it’s same for all instances
main.seed = 1028
UsingR version 4.0.5 (2021-03-31).
suppressPackageStartupMessages({
library(vegan)
library(tidyverse)
library(stats)
library(phyloseq)
library(mefa)
library(qiime2R)
library(microbiome)
library(fantaxtic)
library(pals)
library(stringr)
library(car)
library(rstatix)
library(ggpubr)
library(corrplot)
library(readxl)
library(psych)
library(sjPlot)
library(RVAideMemoire)
library(purrr)
library(olsrr)
library(ggformula)
library(reshape2)
library(mosaic)
library(micropower)
library(kableExtra)
library(gridExtra) #for scatterplots
library(summarytools) #for chi-square test and tables
library(pairwiseAdonis)
source("../helper_functions/omega_sq_adonis.R") #load function to calculate omega squared for adonis objects
library(Maaslin2)
library(ppcor) #for eta-squared calculations
})
Note: Filepaths not shown
physeq_M2448 <- qza_to_phyloseq(
features=otu_table_path,
tree=tree_path,
taxonomy=taxonomy_path,
metadata=metadata_path
)
#create a phyloseq object containing only 24M samples
physeq_M24 <- subset_samples(physeq_M2448, Timepoint_months=="24")
#read in continuous stress data
M24_stress <-
read_csv(m24_stress_data)
M48_stress <-
read_csv(m48_stress_data)
leq_resp <-
read_csv(leq_resp_file) %>%
dplyr::select(
subjid,
person_filling_leq = person_filling_Y04
)
#bind these datasets together
stress_data <-
M24_stress %>%
bind_rows(M48_stress) %>%
left_join(leq_resp, by = "subjid")
#read in alpha diversity data from qiime2
shannon <-
read_tsv(shannon_file) %>%
dplyr::rename('#SampleID' = ...1)
faith <-
read_tsv(faith_file) %>%
dplyr::rename('#SampleID' = ...1)
evenness <-
read_tsv(evenness_file) %>%
dplyr::rename('#SampleID' = ...1)
richness <-
read_tsv(richness_file) %>%
dplyr::rename('#SampleID' = ...1)
#create dataset with alpha diversity metrics & stress
alpha_diversity_stress <-
shannon %>%
left_join(faith, by = "#SampleID") %>%
left_join(evenness, by = "#SampleID") %>%
left_join(richness, by = "#SampleID") %>%
left_join(stress_data, by = "#SampleID")
#read in child sex data
sex_data <- readxl::read_xlsx(sex_file) %>%
dplyr::rename(subjid=SubjectID)
#read in other covariates
bf_data <- read_csv(bf_file) %>%
full_join(read_csv(delivery_file), by = "subjid") %>%
full_join(read_csv(income_file), by = "subjid") %>%
full_join(read_csv(age_file), by = "subjid")
gest_age <- readxl::read_xlsx(gestage_file) %>%
dplyr::select(
subjid = SubjectID,
GA
)
biotics <- read_csv(biotics_file)
#read in sequencing batch
meta <- read_tsv(metadata_path)
meta <- meta[-c(1), ] #remove #q2:types header
meta <-
meta %>%
dplyr::select(
'#SampleID',
Sequencing_batch
)
#bind covariate data to rest
alpha_diversity_stress <-
alpha_diversity_stress %>%
left_join(sex_data, by = "subjid") %>%
left_join(bf_data, by = "subjid") %>%
left_join(meta, by = "#SampleID") %>%
left_join(biotics, by = "subjid") %>%
left_join(gest_age, by = "subjid")
#read in cbcl
child_beh4 <- read_csv(beh4_file) %>%
dplyr::select(
subjid,
age_y4_cbcl = age_y4,
cbcl_respondent_y4,
cbcl_gi_y4,
contains("tot_")
)
child_beh2 <-
read_csv(beh2_file) %>%
dplyr::select(
subjid,
age_m24_cbcl = age_m24,
cbcl_respondent_m24,
cbcl_gi_m24,
contains("tot_")
)
#bind cbcl data to rest
alpha_diversity_stress <-
alpha_diversity_stress %>%
left_join(child_beh4, by = "subjid") %>%
left_join(child_beh2, by = "subjid")
#create categorical postnatal stress var because LEQ has too few scale points
alpha_diversity_stress <-
alpha_diversity_stress %>%
mutate(
postnatal_stress = case_when(
timepoint == 48 & child_neg_events_sum > 0 ~ 1,
timepoint == 48 & child_neg_events_sum == 0 ~ 0,
timepoint == 24 & leq_b2_events_total_Y04 > 0 ~ 1,
timepoint == 24 & leq_b2_events_total_Y04 == 0 ~ 0
)
) %>%
mutate(postnatal_stress = as.factor(postnatal_stress))
#remove all unused variables
alpha_diversity_stress <- alpha_diversity_stress %>%
dplyr::select(
-contains("neg"),
-contains("leq"),
-(contains("ctq")& !contains("total")),
-maltreated_M54,
-contains("days"),
-c_age_years_stoolproxy_y4,
-person_filling_M54
)
#create dataset of cases that have usable microbiome data and reported on at least 1 timepoint of adversity exposure to be used in analyses (N=450)
alpha_diversity_stress_usable <-
alpha_diversity_stress %>%
filter(timepoint==24 & !(is.na(STAI_prorated_state_pw26) & is.na(ctq_total_score_M54) & is.na(postnatal_stress)))
#create categorical prenatal adversity variable
alpha_diversity_stress_usable <- alpha_diversity_stress_usable %>%
mutate(stai_above_clin_cutoff = ifelse(STAI_prorated_state_pw26 > 40, 1, 0))
#create standardized cbcl scores with mean=100, sd=15 within each sex (scores are named cbcl{subscale}tot_{timepoint}_std) and then remove unstandardized variables
alpha_diversity_stress_usable <-
alpha_diversity_stress_usable %>%
group_by(sex_binary) %>%
dplyr::mutate(across(c(contains("tot_"), contains("gi")), .fns = list(std = ~as.vector(scale(.))*15 + 100), .names="{col}_{fn}")) %>%
ungroup() %>%
dplyr::select(
-(contains("tot_") & !contains("std")),
-(contains("gi") & !contains("std"))
)
Note on decision to standardize cbcl raw scores within each sex: ASEBA Y1.5-5 manual recommends using raw scores but standardizing by within your sample by sex (pg 122). Their suggested mean is 100, sd is 15.
#create sum of timepoints with "substantial" adversity exposure (range = 0 (no timempoints with substantial adversity exposure) to 3 (substantial adversity exposure at all timepoints))
alpha_diversity_stress_usable <- alpha_diversity_stress_usable %>%
dplyr::mutate(
sum_advrsty = as.numeric(as.character(maltreated_mod_M54)) + stai_above_clin_cutoff + as.numeric(as.character(postnatal_stress))
)
#determine how many dyads have 0, 1, 2, and 3 timepoints of exposure
alpha_diversity_stress_usable %>% ggplot(aes(x=sum_advrsty, fill=as.factor(sum_advrsty))) +
geom_histogram(binwidth = 1) +
stat_bin(aes(y=..count.., label=ifelse(..count..==0,"",..count..)), geom="text", vjust=-.5) +
scale_fill_discrete("Timepoints with Adversity Exposure") +
xlab("Number of Timepoints with Adverity Exposure") +
ylab("Number of Participants")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#create the collapsed grouping of 3 cumulative adversity levels (because very few dyads had sum_advrsty=3)
alpha_diversity_stress_usable <- alpha_diversity_stress_usable %>%
mutate(
cumulative_adv = case_when(
sum_advrsty == 0 ~ 0,
sum_advrsty == 1 ~ 1,
sum_advrsty == 2 | sum_advrsty == 3 ~ 2
)
)
#convert categorical binary & multicategorical variables to factor type (for analyses that require it later)
alpha_diversity_stress_usable <-
alpha_diversity_stress_usable %>%
dplyr::mutate(across(c(cumulative_adv, sex_binary, deliv_mode, postnatal_stress, Sequencing_batch, maltreated_mod_M54, antibio_M24, probiotics_M24), ~as.factor(.x)))
#write alpha diversity analyses dataset to file
write_csv(alpha_diversity_stress_usable, "../../data/analysis_datasets/fran/alpha_div_stress_usable.csv")
#also write these data to form B data folder for submission with GUSTO form B application
write_csv(alpha_diversity_stress_usable, "../../manuscripts/fran_adversity_microbiome/Form_B_submission/data_updated/alpha_diversity_and_metadata.csv")
#get relevant variables
cbcl_covs <- alpha_diversity_stress_usable %>%
dplyr::select(
monthly_income_per_member,
GA,
cbclintprobtot_m24,
cbcltotprobtot_m24,
cbcldevprobtot_m24,
age_m24_cbcl
)
#correlations with cbcl and continuous covs
corPlot(cbcl_covs,
numbers = TRUE,
stars = TRUE,
show.legend = TRUE,
cex.axis = 0.8,
cex = 0.5)
#no associations between monthly income and cbcl outcome vars
#associations with categorical covs
t.test(cbclintprobtot_m24~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(cbcltotprobtot_m24~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(cbcldevprobtot_m24~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
summary(aov(cbclintprobtot_m24~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbcltotprobtot_m24~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbcldevprobtot_m24~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
No domains differ
#get relevant variables
cbcl_covs_y4 <- alpha_diversity_stress_usable %>%
dplyr::select(
monthly_income_per_member,
GA,
cbclintprobtot_y4,
cbcltotprobtot_y4,
cbclextprobtot_y4,
cbclattprobtot_y4,
cbclsleepprobtot_y4,
cbcldevprobtot_y4,
cbcladhdprobtot_y4,
age_y4_cbcl
)
#correlations with cbcl domains and continuous covariates
corPlot(cbcl_covs_y4,
numbers = TRUE,
stars = TRUE,
show.legend = TRUE,
cex.axis = 0.8,
cex = 0.5)
#no differences
#associations with categorical covs
t.test(cbclintprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(cbcltotprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(cbclextprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #sig, males higher than females
t.test(cbclattprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #sig, males higher than females
t.test(cbclsleepprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(cbcldevprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(cbcladhdprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
summary(aov(cbclintprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbcltotprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbclextprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbclattprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbclsleepprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbcldevprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbcladhdprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
Externalizing and attention problems at year 4 differ by sex (higher in male children on average) – further validating the decision to standardize socioemotional functioning variables within each sex (performed in data wrangling above).
Test association between sequencing batch and adversity variables, and numbers in each batch
#t-test with sequencing batch and preconception, prenatal adversity
t.test(ctq_total_score_M54~Sequencing_batch, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(STAI_prorated_state_pw26~Sequencing_batch, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
#chi-square test with sequencing batch and postnatal stress, adversity grouping variables
chisq.test(alpha_diversity_stress_usable$Sequencing_batch, alpha_diversity_stress_usable$postnatal_stress) #ns
chisq.test(alpha_diversity_stress_usable$Sequencing_batch, alpha_diversity_stress_usable$maltreated_mod_M54) #ns
chisq.test(alpha_diversity_stress_usable$Sequencing_batch, alpha_diversity_stress_usable$stai_above_clin_cutoff) #ns
#count the number of each group in the sequencing batches
alpha_diversity_stress_usable %>%
filter(!is.na(postnatal_stress)) %>%
group_by(Sequencing_batch, postnatal_stress) %>%
summarise(cnt = n()) %>%
mutate(freq = round(cnt / sum(cnt), 3)) %>%
arrange(desc(freq)) #22% yes in batch1 (of non-nas), 25% in batch2
alpha_diversity_stress_usable %>%
filter(!is.na(maltreated_mod_M54)) %>%
group_by(Sequencing_batch, maltreated_mod_M54) %>%
summarise(cnt = n()) %>%
mutate(freq = round(cnt / sum(cnt), 3)) %>%
arrange(desc(freq)) #42% yes in batch1 (of non-nas), 46% in batch2
alpha_diversity_stress_usable %>%
filter(!is.na(stai_above_clin_cutoff)) %>%
group_by(Sequencing_batch, stai_above_clin_cutoff) %>%
summarise(cnt = n()) %>%
mutate(freq = round(cnt / sum(cnt), 3)) %>%
arrange(desc(freq)) #22% in batch 1 for each group
Conclusion: adversity grouping variable prevalences do not differ by sequencing batch
# get top 25 most abundant taxa
physeq_ttaxa <- get_top_taxa(physeq_obj = physeq_M24,
n = 25,
relative = TRUE,
discard_other = TRUE)
#group by postnatal adversity
physeq_ttaxa_leq <- merge_samples(physeq_ttaxa, "postnatal_stress")
#create 'genus species' variable
tmp_leq <- name_taxa(physeq_ttaxa_leq, species = TRUE)
#barplot based on postnatal adversity group (NAs are removed)
fantaxtic_bar(tmp_leq, color_by = "Species", label_by = "Species") +
xlab("Postnatal Adversity") +
ylab("Relative Abundance") +
scale_fill_manual(values=as.vector(glasbey(32)))
## Level N.color.shades
## 1 Faecalibacterium uncultured bacterium 1
## 2 Clostridium sensu stricto 1 uncultured bacterium 1
## 3 Enterococcus Enterococcus devriesei 1
## 4 Staphylococcus uncultured bacterium 1
## 5 Streptococcus uncultured bacterium 2
## 6 Erysipelatoclostridium uncultured bacterium 2
## 7 Collinsella Collinsella aerofaciens ATCC 25986 1
## 8 Bifidobacterium Bifidobacterium pseudocatenulatum 1
## 9 Escherichia-Shigella uncultured bacterium 1
## 10 Cronobacter uncultured bacterium 1
## 11 Akkermansia uncultured bacterium 1
## 12 Bacteroides uncultured bacterium 1
## 13 Blautia uncultured bacterium 4
## 14 Fusicatenibacter uncultured Firmicutes bacterium 1
## 15 [Eubacterium] hallii group uncultured bacterium 1
## 16 Intestinibacter uncultured bacterium 2
## 17 Anaerostipes uncultured bacterium 1
## 18 Lachnoclostridium uncultured Firmicutes bacterium 1
## 19 Tyzzerella 4 uncultured bacterium 1
## Central.color
## 1 #6495ED
## 2 #EDD264
## 3 #646AED
## 4 #DDED64
## 5 #8A64ED
## 6 #B2ED64
## 7 #B564ED
## 8 #87ED64
## 9 #E064ED
## 10 #64ED6D
## 11 #ED64CF
## 12 #64ED98
## 13 #ED64A3
## 14 #64EDC3
## 15 #ED6478
## 16 #64ECED
## 17 #ED7B64
## 18 #64C0ED
## 19 #EDA664
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
#group by prenatal adversity group (above STAI-S cutoff for clinical levels of anxiety)
physeq_ttaxa_merged_stai <- merge_samples(physeq_ttaxa, "stai_state_above_clin_cutoff_pw26")
tmp_stai <- name_taxa(physeq_ttaxa_merged_stai, species = TRUE)
#barplot based on prenatal adversity (NAs are removed)
fantaxtic_bar(tmp_stai, color_by = "Species", label_by = "Species") +
xlab("Prenatal Adversity") +
ylab("Relative Abundance") +
scale_fill_manual(values=as.vector(glasbey(32)))
## Level N.color.shades
## 1 Faecalibacterium uncultured bacterium 1
## 2 Clostridium sensu stricto 1 uncultured bacterium 1
## 3 Enterococcus Enterococcus devriesei 1
## 4 Staphylococcus uncultured bacterium 1
## 5 Streptococcus uncultured bacterium 2
## 6 Erysipelatoclostridium uncultured bacterium 2
## 7 Collinsella Collinsella aerofaciens ATCC 25986 1
## 8 Bifidobacterium Bifidobacterium pseudocatenulatum 1
## 9 Escherichia-Shigella uncultured bacterium 1
## 10 Cronobacter uncultured bacterium 1
## 11 Akkermansia uncultured bacterium 1
## 12 Bacteroides uncultured bacterium 1
## 13 Blautia uncultured bacterium 4
## 14 Fusicatenibacter uncultured Firmicutes bacterium 1
## 15 [Eubacterium] hallii group uncultured bacterium 1
## 16 Intestinibacter uncultured bacterium 2
## 17 Anaerostipes uncultured bacterium 1
## 18 Lachnoclostridium uncultured Firmicutes bacterium 1
## 19 Tyzzerella 4 uncultured bacterium 1
## Central.color
## 1 #6495ED
## 2 #EDD264
## 3 #646AED
## 4 #DDED64
## 5 #8A64ED
## 6 #B2ED64
## 7 #B564ED
## 8 #87ED64
## 9 #E064ED
## 10 #64ED6D
## 11 #ED64CF
## 12 #64ED98
## 13 #ED64A3
## 14 #64EDC3
## 15 #ED6478
## 16 #64ECED
## 17 #ED7B64
## 18 #64C0ED
## 19 #EDA664
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
#group by preconception adversity group ('moderate to severe' maltreatment in at least 1 category)
physeq_ttaxa_merged_ctq <- merge_samples(physeq_ttaxa, "maltreated_mod_M54")
tmp_ctq <- name_taxa(physeq_ttaxa_merged_ctq, species = TRUE)
#barplot based on preconception adversity
fantaxtic_bar(tmp_ctq, color_by = "Species", label_by = "Species") +
xlab("Preconception Adversity") +
ylab("Relative Abundance") +
scale_fill_manual(values=as.vector(glasbey(32)))
## Level N.color.shades
## 1 Faecalibacterium uncultured bacterium 1
## 2 Clostridium sensu stricto 1 uncultured bacterium 1
## 3 Enterococcus Enterococcus devriesei 1
## 4 Staphylococcus uncultured bacterium 1
## 5 Streptococcus uncultured bacterium 2
## 6 Erysipelatoclostridium uncultured bacterium 2
## 7 Collinsella Collinsella aerofaciens ATCC 25986 1
## 8 Bifidobacterium Bifidobacterium pseudocatenulatum 1
## 9 Escherichia-Shigella uncultured bacterium 1
## 10 Cronobacter uncultured bacterium 1
## 11 Akkermansia uncultured bacterium 1
## 12 Bacteroides uncultured bacterium 1
## 13 Blautia uncultured bacterium 4
## 14 Fusicatenibacter uncultured Firmicutes bacterium 1
## 15 [Eubacterium] hallii group uncultured bacterium 1
## 16 Intestinibacter uncultured bacterium 2
## 17 Anaerostipes uncultured bacterium 1
## 18 Lachnoclostridium uncultured Firmicutes bacterium 1
## 19 Tyzzerella 4 uncultured bacterium 1
## Central.color
## 1 #6495ED
## 2 #EDD264
## 3 #646AED
## 4 #DDED64
## 5 #8A64ED
## 6 #B2ED64
## 7 #B564ED
## 8 #87ED64
## 9 #E064ED
## 10 #64ED6D
## 11 #ED64CF
## 12 #64ED98
## 13 #ED64A3
## 14 #64EDC3
## 15 #ED6478
## 16 #64ECED
## 17 #ED7B64
## 18 #64C0ED
## 19 #EDA664
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
# for LEQ, who was the respondent?
alpha_diversity_stress_usable %>%
group_by(any_bf_months) %>%
summarise(n())
## # A tibble: 6 × 2
## any_bf_months `n()`
## <chr> <int>
## 1 <1M 82
## 2 >12M 104
## 3 1M_to_3M 72
## 4 3M_to_6M 80
## 5 6M_to_12M 100
## 6 <NA> 12
#preconception adversity
alpha_diversity_stress_usable %>%
filter(timepoint==24) %>%
ggplot(aes(ctq_total_score_M54)) +
geom_histogram(binwidth = 2) +
xlab("Preconception Adversity Score")
#prenatal adversity
alpha_diversity_stress_usable %>%
filter(timepoint==24) %>%
ggplot(aes(STAI_prorated_state_pw26)) +
geom_histogram(binwidth = 2) +
geom_vline(aes(xintercept = 40, colour="cutoff")) +
scale_color_manual(name = "Clinical Cutoff", values = c("cutoff" = "red"))
#postnatal adversity
alpha_diversity_stress_usable %>%
filter(timepoint==24 & !is.na(postnatal_stress)) %>%
ggplot(aes(postnatal_stress)) +
geom_bar() +
xlab("Postnatal Adversity")
#Alpha diversity -- shannon index
alpha_diversity_stress %>%
filter(timepoint==24) %>%
ggplot(aes(shannon_entropy)) +
geom_histogram(binwidth = 0.25) +
xlab("Shannon Index")
#alpha diveristy -- evenness
alpha_diversity_stress %>%
filter(timepoint==24) %>%
ggplot(aes(pielou_evenness)) +
geom_histogram(binwidth = 0.05)
#alpha diversity -- faith pd
alpha_diversity_stress_usable%>%
filter(timepoint==24) %>%
ggplot(aes(faith_pd)) +
geom_histogram(binwidth = 0.5)
#alpha diversity -- observed features
alpha_diversity_stress_usable%>%
filter(timepoint==24) %>%
ggplot(aes(observed_features)) +
geom_histogram(binwidth = 2)
#cbcl total problems age 2 years
alpha_diversity_stress_usable %>%
ggplot(aes(cbcltotprobtot_m24_std)) +
geom_histogram(binwidth = 5)
#cbcl sleep problems at age 4 years
alpha_diversity_stress_usable %>%
ggplot(aes(cbclsleepprobtot_y4_std)) +
geom_histogram(binwidth = 5)
#Ns
#how many are of each ethnicity?
alpha_diversity_stress_usable %>%
dplyr::count(mother_ethnicity)
#each sex?
alpha_diversity_stress_usable %>%
dplyr::count(sex_binary) #0 = female, 1 = male
#each delivery mode?
alpha_diversity_stress_usable %>%
dplyr::count(deliv_mode_str)
#in each postnatal adveristy group?
alpha_diversity_stress_usable %>%
dplyr::count(postnatal_stress)
#in each preconception adversity group?
alpha_diversity_stress_usable %>%
dplyr::count(maltreated_mod_M54 == 1)
#in each prenatal adveristy group?
alpha_diversity_stress_usable %>%
dplyr::count(STAI_prorated_state_pw26 > 40)
#each antibiotic use group? (yes/no)
alpha_diversity_stress_usable %>%
dplyr::count(antibio_M24 ==1)
#each probiotic use group? (yes/no)
alpha_diversity_stress_usable %>%
dplyr::count(probiotics_M24 ==1)
#each sequencing batch?
alpha_diversity_stress_usable %>%
dplyr::count(Sequencing_batch =="batch1")
#who is the respondent for LEQ?
alpha_diversity_stress_usable %>%
filter(!is.na(leq_b2_events_total_Y04)) %>%
dplyr::count(person_filling_Y04)
#who is the respondent for CBCL year 2?
alpha_diversity_stress_usable %>%
filter(!is.na(cbcl_respondent_m24)) %>%
dplyr::count(cbcl_respondent_m24)
#who is the respondent for CBCL year 4?
alpha_diversity_stress_usable %>%
filter(!is.na(cbcl_respondent_y4)) %>%
dplyr::count(cbcl_respondent_y4)
#how many for each breastfeeding category?
table(alpha_diversity_stress_usable$only_bf_months)
table(alpha_diversity_stress_usable$any_bf_months)
#mean, sd, range (continuous covariates, adversities, alpha diversity metrics)
favstats(~monthly_income_per_member, data=alpha_diversity_stress_usable)
favstats(~leq_b2_events_total_Y04, data=alpha_diversity_stress_usable)
favstats(~STAI_prorated_state_pw26, data=alpha_diversity_stress_usable)
favstats(~ctq_total_score_M54, data=alpha_diversity_stress_usable)
favstats(~GA, data=alpha_diversity_stress_usable)
favstats(~observed_features, data=alpha_diversity_stress_usable)
favstats(~faith_pd, data=alpha_diversity_stress_usable)
favstats(~shannon_entropy, data=alpha_diversity_stress_usable)
favstats(~pielou_evenness, data=alpha_diversity_stress_usable)
#mean, sd, range (cbcl variables)
favstats(~cbcldevprobtot_m24_std, data=alpha_diversity_stress_usable)
favstats(~cbclintprobtot_m24_std, data=alpha_diversity_stress_usable)
favstats(~cbclsleepprobtscore_m24, data=alpha_diversity_stress_usable)
favstats(~cbcltotprobtot_m24_std, data=alpha_diversity_stress_usable)
favstats(~cbcladhdprobtot_y4_std, data=alpha_diversity_stress_usable)
favstats(~cbclattprobtot_y4_std, data=alpha_diversity_stress_usable)
favstats(~cbcldevprobtot_y4_std, data=alpha_diversity_stress_usable)
favstats(~cbclextprobtot_y4_std, data=alpha_diversity_stress_usable)
favstats(~cbclintprobtot_y4_std, data=alpha_diversity_stress_usable)
favstats(~cbclsleepprobtot_y4_std, data=alpha_diversity_stress_usable)
favstats(~cbcltotprobtot_y4_std, data=alpha_diversity_stress_usable)
#how many complete on year 2 cbcl? (all participants that have 1 subscale, have all of them)
alpha_diversity_stress_usable %>% dplyr::count(!is.na(cbclintprobtot_m24_std)) #e.g., 178 with complete for internalizing problems y2
alpha_diversity_stress_usable %>% dplyr::count(!is.na(cbclintprobtot_y4_std)) #e.g., 300 with complete for internalizing problems y4
#mean, sd, range (age variables)
favstats(~c_age_years_stoolproxy_m24, data=alpha_diversity_stress_usable)
favstats(~age_y4_cbcl, data=alpha_diversity_stress_usable)
favstats(~age_m24_cbcl, data=alpha_diversity_stress_usable)
#how many complete on age variables?
alpha_diversity_stress_usable %>% dplyr::count(!is.na(age_m24_cbcl)) #only 145 with complete data on age at Y2
alpha_diversity_stress_usable %>% dplyr::count(!is.na(age_y4_cbcl)) #only 299 with complete data on age at Y4
#how many with complete data on all adversities?
alpha_diversity_stress_usable %>% dplyr::count(!is.na(postnatal_stress) & !is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26)) #N=205
#create df with just continuous variables for correlation matrix
alpha_div_stress_cor <-
alpha_diversity_stress_usable %>%
dplyr::select(
shannon_entropy,
faith_pd,
pielou_evenness,
observed_features,
STAI_prorated_state_pw26,
ctq_total_score_M54,
monthly_income_per_member,
GA,
c_age_years_stoolproxy_m24
)
#generate correlations
cor(alpha_div_stress_cor, use="pairwise.complete.obs")
## shannon_entropy faith_pd pielou_evenness
## shannon_entropy 1.0000000000 0.4183426493 0.971758951
## faith_pd 0.4183426493 1.0000000000 0.315802288
## pielou_evenness 0.9717589514 0.3158022878 1.000000000
## observed_features 0.7183060298 0.5687247721 0.537840793
## STAI_prorated_state_pw26 0.0047040890 -0.0510138731 0.003943694
## ctq_total_score_M54 0.0004104005 -0.0474815685 0.001324786
## monthly_income_per_member -0.0207373915 -0.0651941518 0.013721329
## GA 0.0358812113 0.0003925481 0.056962115
## c_age_years_stoolproxy_m24 0.0566809881 0.0162011701 0.049665400
## observed_features STAI_prorated_state_pw26
## shannon_entropy 0.718306030 0.004704089
## faith_pd 0.568724772 -0.051013873
## pielou_evenness 0.537840793 0.003943694
## observed_features 1.000000000 0.002426840
## STAI_prorated_state_pw26 0.002426840 1.000000000
## ctq_total_score_M54 0.006883225 0.176639006
## monthly_income_per_member -0.118933139 -0.143561284
## GA -0.049055254 -0.045515010
## c_age_years_stoolproxy_m24 0.058456510 0.076372335
## ctq_total_score_M54 monthly_income_per_member
## shannon_entropy 0.0004104005 -0.02073739
## faith_pd -0.0474815685 -0.06519415
## pielou_evenness 0.0013247861 0.01372133
## observed_features 0.0068832251 -0.11893314
## STAI_prorated_state_pw26 0.1766390064 -0.14356128
## ctq_total_score_M54 1.0000000000 0.06496342
## monthly_income_per_member 0.0649634151 1.00000000
## GA 0.0116413647 0.13389398
## c_age_years_stoolproxy_m24 0.0743657585 0.14566203
## GA c_age_years_stoolproxy_m24
## shannon_entropy 0.0358812113 0.05668099
## faith_pd 0.0003925481 0.01620117
## pielou_evenness 0.0569621151 0.04966540
## observed_features -0.0490552535 0.05845651
## STAI_prorated_state_pw26 -0.0455150099 0.07637233
## ctq_total_score_M54 0.0116413647 0.07436576
## monthly_income_per_member 0.1338939839 0.14566203
## GA 1.0000000000 -0.15288394
## c_age_years_stoolproxy_m24 -0.1528839353 1.00000000
cor.test(alpha_diversity_stress_usable$observed_features, alpha_diversity_stress_usable$monthly_income_per_member)
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.4372, df = 414, p-value = 0.01522
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.21264715 -0.02305111
## sample estimates:
## cor
## -0.1189331
#visualize correlations
corPlot(alpha_div_stress_cor,
numbers=TRUE,
main = "",
stars=TRUE,
labels = c("Shannon index", "Faith PD", "Pielou's Evennes", "Observed Features", "Prenatal Adversity", "Preconception Adversity", "Income", "Gestational Age", "Child Age at Stool Collection"),
show.legend=TRUE,
diag=FALSE,
upper=FALSE,
scale = TRUE,
xlas=2,
# gr=gr,
cex.axis = 0.8,
cex = 0.5)
Significant continuous covariates: monthly income
Potential covariates: sex_binary, delivery mode, breastfeeding vars, ethnicity, antibiotics, probiotics, sequencing batch
#alpha diveristy by sex
t.test(shannon_entropy~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(faith_pd~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #females higher
t.test(pielou_evenness~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(observed_features~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #females higher
#differences in alpha div by delivery mode
alpha_diversity_stress_usable <-
alpha_diversity_stress_usable %>%
mutate(deliv_mode_str = as.factor(deliv_mode_str)) #convert categorical var to factor
t.test(shannon_entropy~deliv_mode_str, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(faith_pd~deliv_mode_str, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #vaginally born higher
t.test(pielou_evenness~deliv_mode_str, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(observed_features~deliv_mode_str, data = alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
#difference in alpha div by duration of exclusive breastfeeding (one-way ANOVA)
alpha_diversity_stress_usable <-
alpha_diversity_stress_usable %>%
mutate(only_bf_months = as.factor(only_bf_months)) #convert categorical var to factor
summary(aov(shannon_entropy~only_bf_months, data=alpha_diversity_stress_usable)) #ns
summary(aov(faith_pd~only_bf_months, data=alpha_diversity_stress_usable)) #ns
summary(aov(pielou_evenness~only_bf_months, data=alpha_diversity_stress_usable)) #ns
summary(aov(observed_features~only_bf_months, data=alpha_diversity_stress_usable)) #ns
#difference in alpha diversity by duration of any breastfeeding (one-way ANOVA)
alpha_diversity_stress_usable <-
alpha_diversity_stress_usable %>%
mutate(any_bf_months = as.factor(any_bf_months)) #convert categorical var to factor
summary(aov(shannon_entropy~any_bf_months, data=alpha_diversity_stress_usable)) #ns
summary(aov(faith_pd~any_bf_months, data=alpha_diversity_stress_usable)) #ns
summary(aov(pielou_evenness~any_bf_months, data=alpha_diversity_stress_usable)) #ns
summary(aov(observed_features~any_bf_months, data=alpha_diversity_stress_usable)) #ns
#difference in alpha diversity by ethnicity (one-way ANOVA)
alpha_diversity_stress_usable <-
alpha_diversity_stress_usable %>%
mutate(mother_ethnicity = as.factor(mother_ethnicity)) #convert categorical var to factor
summary(aov(shannon_entropy~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(faith_pd~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(pielou_evenness~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(observed_features~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
#difference in alpha diversity by antibiotics
alpha_diversity_stress_usable <-
alpha_diversity_stress_usable %>%
mutate(antibio_M24 = as.factor(antibio_M24))
t.test(shannon_entropy~antibio_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(faith_pd~antibio_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(pielou_evenness~antibio_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(observed_features~antibio_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
#difference in alpha diversity by probiotics
alpha_diversity_stress_usable <-
alpha_diversity_stress_usable %>%
mutate(probiotics_M24 = as.factor(probiotics_M24))
t.test(shannon_entropy~probiotics_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(faith_pd~probiotics_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(pielou_evenness~probiotics_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(observed_features~probiotics_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
#difference in alpha diversity by sequencing batch
alpha_diversity_stress_usable <-
alpha_diversity_stress_usable %>%
mutate(Sequencing_batch = as.factor(Sequencing_batch))
t.test(shannon_entropy~Sequencing_batch, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #sig (batch 1 higher)
t.test(faith_pd~Sequencing_batch, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(pielou_evenness~Sequencing_batch, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #sig (batch 1 higher)
t.test(observed_features~Sequencing_batch, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
Significant categorical covariates: delivery mode, sex, sequencing batch
Non-binary categorical covariates: ethnicity (3 levels), duration of
exclusive breastfeeding (4 levels)
Dummy coding means each group is compared to the reference group
#dummy code ethnicity
alpha_diversity_stress_usable <- base::transform(alpha_diversity_stress_usable,
eth_f1 = (mother_ethnicity == "malay"),
eth_f2 = (mother_ethnicity == "indian"))
#factor 1 = Malay relative to Chinese
#factor 2 = Indian relative to Chinese
#Chinese is reference group
#dummy code duration of exclusive breastfeeding
alpha_diversity_stress_usable <- base::transform(alpha_diversity_stress_usable,
bf_f1 = (only_bf_months == "1M_to_3M"),
bf_f2 = (only_bf_months == "3M_to_6M"),
bf_f3 = (only_bf_months == "6M_to_12M"))
#less than 1 month is reference group
#factor 1 = 1 to 3 months relative to 1 month
#factor 2 = 3 to 6 months relative to 1 month
#factor 3 = 6 to 12 months relative to 1 month
#dummy code cumulative adversity in comparison to no exposure group
alpha_diversity_stress_usable <- base::transform(alpha_diversity_stress_usable,
cumulative_0v1 = (cumulative_adv == "1"),
cumulative_0v2 = (cumulative_adv == "2"))
#0 (no adversity exposure) is the reference group
#factor 1 = no exposure relative to 1 timepoint
#factor 2 = no exposure relative to 2+ timepoints
Note: We do not need to check normality of residuals. Since the number of observations per variable is > 10 in all models, violations of normality will not impact results Schmidt & Finan, 2018
#define base models -- no covariates
base_mod_precon <- lm(shannon_entropy~ctq_total_score_M54, data = alpha_diversity_stress_usable, na.action = na.exclude) #na.exclude allows for residuals to have NAs added so that they can be joined to df which includes more than complete cases
alpha_diversity_stress_usable$pred_base_mod_precon <- predict(base_mod_precon)
alpha_diversity_stress_usable$resid_base_mod_precon <- residuals(base_mod_precon)
base_mod_prenat <- lm(shannon_entropy~STAI_prorated_state_pw26, data = alpha_diversity_stress_usable, na.action = na.exclude)
alpha_diversity_stress_usable$pred_base_mod_prenat <- predict(base_mod_prenat)
alpha_diversity_stress_usable$resid_base_mod_prenat <- resid(base_mod_prenat)
base_mod_postnat <- lm(shannon_entropy~postnatal_stress, data = alpha_diversity_stress_usable, na.action = na.exclude)
alpha_diversity_stress_usable$pred_base_mod_postnat <- predict(base_mod_postnat)
alpha_diversity_stress_usable$resid_base_mod_postnat <- resid(base_mod_postnat)
#check homoskadisticity of residuals -- base models
gf_point(resid_base_mod_precon~pred_base_mod_precon, data = alpha_diversity_stress_usable)
gf_point(resid_base_mod_prenat~pred_base_mod_prenat, data = alpha_diversity_stress_usable)
gf_point(resid_base_mod_postnat~pred_base_mod_postnat, data = alpha_diversity_stress_usable)
#outliers
ols_plot_dfbetas(base_mod_precon)
ols_plot_dfbetas(base_mod_prenat)
ols_plot_dfbetas(base_mod_postnat)
#define models with covariates (delivery mode, child sex, sequencing batch, income)
cov1_mod_precon <-lm(shannon_entropy~ctq_total_score_M54+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable, na.action = na.exclude)
alpha_diversity_stress_usable$pred_cov1_mod_precon <- predict(cov1_mod_precon)
alpha_diversity_stress_usable$resid_cov1_mod_precon <- residuals(cov1_mod_precon)
cov1_mod_prenat <-lm(shannon_entropy~STAI_prorated_state_pw26+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable, na.action = na.exclude)
alpha_diversity_stress_usable$pred_cov1_mod_prenat <- predict(cov1_mod_prenat)
alpha_diversity_stress_usable$resid_cov1_mod_prenat <- resid(cov1_mod_prenat)
cov1_mod_postnat <- lm(shannon_entropy~postnatal_stress+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable, na.action = na.exclude)
alpha_diversity_stress_usable$pred_cov1_mod_postnat <- predict(cov1_mod_postnat)
alpha_diversity_stress_usable$resid_cov1_mod_postnat <- resid(cov1_mod_postnat)
#check homoskedasticity of residuals -- cov1 models
gf_point(resid_cov1_mod_precon~pred_cov1_mod_precon, data = alpha_diversity_stress_usable)
gf_point(resid_cov1_mod_prenat~pred_cov1_mod_prenat, data = alpha_diversity_stress_usable)
gf_point(resid_cov1_mod_postnat~pred_cov1_mod_postnat, data = alpha_diversity_stress_usable)
#check linearity with respect to each predictor
gf_point(shannon_entropy~ctq_total_score_M54, data=alpha_diversity_stress_usable)
gf_point(shannon_entropy~STAI_prorated_state_pw26, data=alpha_diversity_stress_usable)
gf_violin(shannon_entropy~postnatal_stress, data=na.omit(alpha_diversity_stress_usable))
Conclusions: homoskadisticity of residuals is violated – will perform regression with HC3 standard errors
#define models
cov1_mod_mult <- lm(shannon_entropy~sex_binary+deliv_mode+monthly_income_per_member+Sequencing_batch+ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)
base_mod_mult <- lm(shannon_entropy~ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)
#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_mult, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE))
| shannon entropy | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 3.35 | 0.25 | 0.18 | 0.14 | 2.86 – 3.85 | -0.10 – 0.45 | 13.44 | <0.001 |
| sex binary [1] | 0.05 | 0.09 | 0.08 | 0.15 | -0.13 – 0.23 | -0.21 – 0.37 | 0.53 | 0.595 |
| deliv mode [1] | -0.13 | 0.10 | -0.20 | 0.17 | -0.33 – 0.08 | -0.53 – 0.13 | -1.22 | 0.225 |
| monthly income per member | -0.00 | 0.00 | -0.02 | 0.07 | -0.00 – 0.00 | -0.16 – 0.13 | -0.23 | 0.816 |
| Sequencing batch [batch2] | -0.19 | 0.11 | -0.31 | 0.18 | -0.41 – 0.03 | -0.65 – 0.04 | -1.74 | 0.083 |
| ctq total score M54 | -0.00 | 0.01 | -0.06 | 0.10 | -0.02 – 0.01 | -0.26 – 0.14 | -0.59 | 0.555 |
| STAI prorated state pw26 | 0.01 | 0.01 | 0.09 | 0.07 | -0.00 – 0.02 | -0.05 – 0.23 | 1.27 | 0.207 |
| postnatal stress [1] | -0.15 | 0.10 | -0.24 | 0.16 | -0.35 – 0.05 | -0.55 – 0.08 | -1.48 | 0.142 |
| Observations | 186 | |||||||
| R2 / R2 adjusted | 0.053 / 0.016 | |||||||
#knitr::knit_print will print the results (which is automatically generated as a .html, and not printed to console)
knitr::knit_print(tab_model(base_mod_mult, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE))
| shannon entropy | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 3.35 | 0.19 | 0.06 | 0.08 | 2.98 – 3.73 | -0.10 – 0.23 | 17.66 | <0.001 |
| ctq total score M54 | -0.01 | 0.01 | -0.10 | 0.08 | -0.02 – 0.00 | -0.27 – 0.06 | -1.22 | 0.223 |
| STAI prorated state pw26 | 0.01 | 0.00 | 0.08 | 0.07 | -0.00 – 0.01 | -0.05 – 0.21 | 1.20 | 0.230 |
| postnatal stress [1] | -0.15 | 0.09 | -0.24 | 0.15 | -0.33 – 0.03 | -0.53 – 0.06 | -1.60 | 0.111 |
| Observations | 205 | |||||||
| R2 / R2 adjusted | 0.026 / 0.011 | |||||||
#calculate effect sizes (eta squared/change in r squared) -- covariates
alpha_div_all_cov_complete <- #create complete cases dataset for this analysis
alpha_diversity_stress_usable %>%
filter(!is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(postnatal_stress) & !is.na(deliv_mode) & !is.na(sex_binary) &!is.na(monthly_income_per_member) &!is.na(Sequencing_batch))
eta_shannon_covs <- with(alpha_div_all_cov_complete, spcor(cbind(ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, sex_binary, deliv_mode, monthly_income_per_member, Sequencing_batch, shannon_entropy))$estimate^2)
kable(eta_shannon_covs[,ncol(eta_shannon_covs)], col.names="R^2 of each variable on shannon index") %>% kable_styling() #output to console the column that has effect sizes we are interested in (shannon entropy as the outcome)
| R^2 of each variable on shannon index | |
|---|---|
| ctq_total_score_M54 | 0.0033158 |
| STAI_prorated_state_pw26 | 0.0077526 |
| postnatal_stress | 0.0107110 |
| sex_binary | 0.0015082 |
| deliv_mode | 0.0087480 |
| monthly_income_per_member | 0.0002920 |
| Sequencing_batch | 0.0191355 |
| shannon_entropy | 1.0000000 |
#calculate effect sizes (eta squared/change in r squared) -- no covariates
alpha_div_all_ncov_complete <-
alpha_diversity_stress_usable %>%
filter(!is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(postnatal_stress))
eta_shannon_ncovs <- with(alpha_div_all_ncov_complete, spcor(cbind(ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, shannon_entropy))$estimate^2)
kable(eta_shannon_ncovs[,ncol(eta_shannon_ncovs)], col.names="R^2 of each variable on shannon index") %>% kable_styling() #kable functions give the output nicer formatting
| R^2 of each variable on shannon index | |
|---|---|
| ctq_total_score_M54 | 0.0101649 |
| STAI_prorated_state_pw26 | 0.0062660 |
| postnatal_stress | 0.0113086 |
| shannon_entropy | 1.0000000 |
#FAITH
#define models
cov1_mod_mult_faith <- lm(faith_pd~sex_binary+deliv_mode+monthly_income_per_member+Sequencing_batch+ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)
base_mod_mult_faith <- lm(faith_pd~ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)
#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_mult_faith, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/faith_covs.doc"))
| faith pd | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 7.12 | 0.74 | 0.16 | 0.14 | 5.67 – 8.58 | -0.11 – 0.42 | 9.68 | <0.001 |
| sex binary [1] | -0.27 | 0.29 | -0.15 | 0.15 | -0.83 – 0.30 | -0.45 – 0.16 | -0.94 | 0.349 |
| deliv mode [1] | -0.15 | 0.29 | -0.08 | 0.16 | -0.73 – 0.42 | -0.39 – 0.23 | -0.53 | 0.595 |
| monthly income per member | -0.00 | 0.00 | -0.04 | 0.07 | -0.00 – 0.00 | -0.17 – 0.10 | -0.53 | 0.596 |
| Sequencing batch [batch2] | 0.10 | 0.31 | 0.05 | 0.17 | -0.52 – 0.72 | -0.28 – 0.39 | 0.32 | 0.748 |
| ctq total score M54 | -0.01 | 0.01 | -0.08 | 0.07 | -0.04 – 0.01 | -0.22 – 0.07 | -1.06 | 0.292 |
| STAI prorated state pw26 | -0.01 | 0.01 | -0.05 | 0.07 | -0.04 – 0.02 | -0.19 – 0.08 | -0.76 | 0.446 |
| postnatal stress [1] | -0.55 | 0.27 | -0.30 | 0.15 | -1.08 – -0.02 | -0.58 – -0.01 | -2.05 | 0.041 |
| Observations | 186 | |||||||
| R2 / R2 adjusted | 0.036 / -0.002 | |||||||
knitr::knit_print(tab_model(base_mod_mult_faith, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/faith_nocovs.doc"))
| faith pd | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 6.80 | 0.57 | 0.08 | 0.09 | 5.69 – 7.92 | -0.09 – 0.25 | 12.02 | <0.001 |
| ctq total score M54 | -0.01 | 0.01 | -0.09 | 0.06 | -0.03 – 0.01 | -0.20 – 0.03 | -1.44 | 0.150 |
| STAI prorated state pw26 | -0.01 | 0.01 | -0.06 | 0.06 | -0.04 – 0.01 | -0.18 – 0.07 | -0.88 | 0.382 |
| postnatal stress [1] | -0.52 | 0.24 | -0.29 | 0.14 | -1.00 – -0.05 | -0.56 – -0.03 | -2.16 | 0.032 |
| Observations | 205 | |||||||
| R2 / R2 adjusted | 0.030 / 0.015 | |||||||
#find mean of faith pd by postnatal stress to see direction of effect
alpha_diversity_stress_usable %>% group_by(postnatal_stress) %>% summarise_at(vars(faith_pd), list(name = mean))
## # A tibble: 3 × 2
## postnatal_stress name
## <fct> <dbl>
## 1 0 5.79
## 2 1 5.38
## 3 <NA> 5.78
#calculate effect sizes (eta squared/change in r squared) -- covariates
alpha_div_faith_all_cov_complete <-
alpha_diversity_stress_usable %>%
filter(!is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(postnatal_stress) & !is.na(deliv_mode) & !is.na(sex_binary) &!is.na(monthly_income_per_member)&!is.na(Sequencing_batch))
eta_faith_covs <- with(alpha_div_faith_all_cov_complete, spcor(cbind(deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, faith_pd))$estimate^2)
kable(eta_faith_covs[,ncol(eta_faith_covs)], col.names="R^2 of each variable on Faith's PD") %>% kable_styling()
| R^2 of each variable on Faith’s PD | |
|---|---|
| deliv_mode | 0.0014699 |
| sex_binary | 0.0051425 |
| monthly_income_per_member | 0.0012725 |
| Sequencing_batch | 0.0006136 |
| ctq_total_score_M54 | 0.0050787 |
| STAI_prorated_state_pw26 | 0.0025229 |
| postnatal_stress | 0.0166291 |
| faith_pd | 1.0000000 |
#calculate effect sizes (eta squared/change in r squared) -- no covariates
alpha_div_faith_all_base_complete <-
alpha_diversity_stress_usable %>%
filter(!is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(postnatal_stress))
eta_faith_ncovs <- with(alpha_div_faith_all_base_complete, spcor(cbind(ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, faith_pd))$estimate^2)
kable(eta_faith_ncovs[,ncol(eta_faith_ncovs)], col.names="R^2 of each variable on Faith's PD") %>% kable_styling()
| R^2 of each variable on Faith’s PD | |
|---|---|
| ctq_total_score_M54 | 0.0068937 |
| STAI_prorated_state_pw26 | 0.0028924 |
| postnatal_stress | 0.0169593 |
| faith_pd | 1.0000000 |
#OBSERVED FEATURES
#define models
cov1_mod_mult_feat <- lm(observed_features~sex_binary+deliv_mode+monthly_income_per_member+Sequencing_batch+ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)
base_mod_mult_feat <- lm(observed_features~ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)
#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_mult_feat, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/observed_covs.doc"))
| observed features | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 86.96 | 7.28 | 0.27 | 0.14 | 72.59 – 101.34 | -0.01 – 0.56 | 11.94 | <0.001 |
| sex binary [1] | -5.60 | 2.66 | -0.30 | 0.14 | -10.84 – -0.36 | -0.58 – -0.02 | -2.11 | 0.036 |
| deliv mode [1] | -2.49 | 3.06 | -0.13 | 0.16 | -8.52 – 3.54 | -0.46 – 0.19 | -0.82 | 0.415 |
| monthly income per member | -0.00 | 0.00 | -0.14 | 0.08 | -0.01 – 0.00 | -0.29 – 0.01 | -1.88 | 0.062 |
| Sequencing batch [batch2] | -1.47 | 3.08 | -0.08 | 0.17 | -7.55 – 4.61 | -0.41 – 0.25 | -0.48 | 0.634 |
| ctq total score M54 | 0.12 | 0.17 | 0.06 | 0.08 | -0.21 – 0.45 | -0.11 – 0.23 | 0.72 | 0.473 |
| STAI prorated state pw26 | -0.04 | 0.15 | -0.02 | 0.07 | -0.35 – 0.26 | -0.17 – 0.12 | -0.29 | 0.770 |
| postnatal stress [1] | -4.71 | 3.15 | -0.25 | 0.17 | -10.93 – 1.51 | -0.59 – 0.08 | -1.49 | 0.137 |
| Observations | 186 | |||||||
| R2 / R2 adjusted | 0.065 / 0.028 | |||||||
knitr::knit_print(tab_model(base_mod_mult_feat, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/observed_nocovs.doc"))
| observed features | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 82.35 | 5.68 | 0.05 | 0.08 | 71.16 – 93.54 | -0.11 – 0.21 | 14.51 | <0.001 |
| ctq total score M54 | -0.00 | 0.12 | -0.00 | 0.07 | -0.24 – 0.23 | -0.14 – 0.14 | -0.03 | 0.977 |
| STAI prorated state pw26 | -0.05 | 0.15 | -0.02 | 0.07 | -0.33 – 0.24 | -0.16 – 0.12 | -0.31 | 0.756 |
| postnatal stress [1] | -3.46 | 2.91 | -0.19 | 0.16 | -9.19 – 2.27 | -0.50 – 0.12 | -1.19 | 0.235 |
| Observations | 205 | |||||||
| R2 / R2 adjusted | 0.008 / -0.007 | |||||||
#calculate effect sizes (eta squared/change in r squared) -- covariates
alpha_div_obs_all_base_complete <-
alpha_diversity_stress_usable %>%
filter(!is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(postnatal_stress))
eta_features_covs <- with(alpha_div_obs_all_base_complete, spcor(cbind(ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, observed_features))$estimate^2)
kable(eta_features_covs[,ncol(eta_features_covs)], col.names="R^2 of each variable on Observed Features") %>% kable_styling()
| R^2 of each variable on Observed Features | |
|---|---|
| ctq_total_score_M54 | 0.0000037 |
| STAI_prorated_state_pw26 | 0.0004633 |
| postnatal_stress | 0.0070236 |
| observed_features | 1.0000000 |
#calculate effect sizes (eta squared/change in r squared) -- no covariates
alpha_div_obs_all_cov_complete <-
alpha_diversity_stress_usable %>%
filter(!is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(postnatal_stress) & !is.na(deliv_mode) & !is.na(sex_binary) &!is.na(monthly_income_per_member)&!is.na(Sequencing_batch))
eta_features_ncovs <- with(alpha_div_obs_all_cov_complete, spcor(cbind(deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, observed_features))$estimate^2)
kable(eta_features_ncovs[,ncol(eta_features_ncovs)], col.names="R^2 of each variable on Observed Features") %>% kable_styling()
| R^2 of each variable on Observed Features | |
|---|---|
| deliv_mode | 0.0039110 |
| sex_binary | 0.0224025 |
| monthly_income_per_member | 0.0191220 |
| Sequencing_batch | 0.0013219 |
| ctq_total_score_M54 | 0.0033355 |
| STAI_prorated_state_pw26 | 0.0004448 |
| postnatal_stress | 0.0124610 |
| observed_features | 1.0000000 |
#EVENNESS
#define models
cov1_mod_mult_even <- lm(pielou_evenness~sex_binary+deliv_mode+monthly_income_per_member+Sequencing_batch+ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)
base_mod_mult_even <- lm(pielou_evenness~ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)
#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_mult_even, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/evenness_covs.doc"))
| pielou evenness | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 0.52 | 0.03 | 0.12 | 0.13 | 0.46 – 0.59 | -0.14 – 0.39 | 15.76 | <0.001 |
| sex binary [1] | 0.02 | 0.01 | 0.18 | 0.14 | -0.01 – 0.04 | -0.10 – 0.46 | 1.25 | 0.214 |
| deliv mode [1] | -0.02 | 0.01 | -0.20 | 0.17 | -0.04 – 0.01 | -0.52 – 0.13 | -1.20 | 0.231 |
| monthly income per member | 0.00 | 0.00 | 0.02 | 0.07 | -0.00 – 0.00 | -0.12 – 0.17 | 0.33 | 0.739 |
| Sequencing batch [batch2] | -0.03 | 0.01 | -0.35 | 0.18 | -0.06 – 0.00 | -0.69 – 0.00 | -1.96 | 0.052 |
| ctq total score M54 | -0.00 | 0.00 | -0.09 | 0.11 | -0.00 – 0.00 | -0.31 – 0.12 | -0.88 | 0.381 |
| STAI prorated state pw26 | 0.00 | 0.00 | 0.11 | 0.07 | -0.00 – 0.00 | -0.03 – 0.25 | 1.55 | 0.124 |
| postnatal stress [1] | -0.02 | 0.01 | -0.18 | 0.16 | -0.04 – 0.01 | -0.50 – 0.14 | -1.10 | 0.271 |
| Observations | 186 | |||||||
| R2 / R2 adjusted | 0.067 / 0.030 | |||||||
knitr::knit_print(tab_model(base_mod_mult_even, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/evenness_nocovs.doc"))
| pielou evenness | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 0.53 | 0.03 | 0.05 | 0.08 | 0.48 – 0.58 | -0.11 – 0.22 | 20.57 | <0.001 |
| ctq total score M54 | -0.00 | 0.00 | -0.12 | 0.09 | -0.00 – 0.00 | -0.30 – 0.05 | -1.39 | 0.167 |
| STAI prorated state pw26 | 0.00 | 0.00 | 0.10 | 0.07 | -0.00 – 0.00 | -0.03 – 0.23 | 1.48 | 0.141 |
| postnatal stress [1] | -0.02 | 0.01 | -0.20 | 0.15 | -0.04 – 0.01 | -0.50 – 0.10 | -1.33 | 0.187 |
| Observations | 205 | |||||||
| R2 / R2 adjusted | 0.029 / 0.014 | |||||||
#calculate effect sizes (eta squared/change in r squared) -- no covariates
eta_even_ncovs <- with(alpha_div_obs_all_base_complete, spcor(cbind(ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, pielou_evenness))$estimate^2)
kable(eta_even_ncovs[,ncol(eta_even_ncovs)], col.names="R^2 of each variable on Pielou Evenness") %>% kable_styling()
| R^2 of each variable on Pielou Evenness | |
|---|---|
| ctq_total_score_M54 | 0.0144675 |
| STAI_prorated_state_pw26 | 0.0094482 |
| postnatal_stress | 0.0080909 |
| pielou_evenness | 1.0000000 |
#calculate effect sizes (eta squared/change in r squared) -- covariates
eta_even_covs <- with(alpha_div_obs_all_cov_complete, spcor(cbind(deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, pielou_evenness))$estimate^2)
kable(eta_even_covs[,ncol(eta_even_covs)], col.names="R^2 of each variable on Pielou Evenness") %>% kable_styling()
| R^2 of each variable on Pielou Evenness | |
|---|---|
| deliv_mode | 0.0085315 |
| sex_binary | 0.0081127 |
| monthly_income_per_member | 0.0005662 |
| Sequencing_batch | 0.0247096 |
| ctq_total_score_M54 | 0.0081330 |
| STAI_prorated_state_pw26 | 0.0117345 |
| postnatal_stress | 0.0062398 |
| pielou_evenness | 1.0000000 |
#SHANNON INDEX
y <- favstats(shannon_entropy~cumulative_adv, data=alpha_diversity_stress_usable) #get group sizes
#define models
base_mod_cumulative0v2_sum <- lm(shannon_entropy~cumulative_0v1 + cumulative_0v2, data = alpha_diversity_stress_usable)
cov1_mod_cumulative0v2_sum <- lm(shannon_entropy~cumulative_0v1 + cumulative_0v2+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable)
#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_cumulative0v2_sum, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/shannon_cumulative_covs.doc"))
| shannon entropy | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 3.41 | 0.13 | 0.15 | 0.17 | 3.16 – 3.67 | -0.18 – 0.48 | 25.94 | <0.001 |
| cumulative 0v1TRUE | 0.00 | 0.11 | 0.00 | 0.17 | -0.21 – 0.22 | -0.33 – 0.34 | 0.02 | 0.984 |
| cumulative 0v2TRUE | -0.12 | 0.13 | -0.19 | 0.21 | -0.38 – 0.14 | -0.60 – 0.22 | -0.91 | 0.364 |
| deliv mode [1] | -0.14 | 0.10 | -0.22 | 0.17 | -0.34 – 0.07 | -0.55 – 0.11 | -1.32 | 0.188 |
| sex binary [1] | 0.07 | 0.09 | 0.10 | 0.15 | -0.12 – 0.25 | -0.19 – 0.40 | 0.70 | 0.487 |
| monthly income per member | -0.00 | 0.00 | -0.04 | 0.07 | -0.00 – 0.00 | -0.18 – 0.11 | -0.47 | 0.635 |
| Sequencing batch [batch2] | -0.20 | 0.12 | -0.31 | 0.18 | -0.43 – 0.03 | -0.68 – 0.05 | -1.68 | 0.095 |
| Observations | 186 | |||||||
| R2 / R2 adjusted | 0.039 / 0.007 | |||||||
knitr::knit_print(tab_model(base_mod_cumulative0v2_sum, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/shannon_cumulative_nocovs.doc"))
| shannon entropy | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 3.32 | 0.07 | 0.07 | 0.11 | 3.19 – 3.45 | -0.14 – 0.28 | 50.23 | <0.001 |
| cumulative 0v1TRUE | -0.04 | 0.10 | -0.06 | 0.16 | -0.23 – 0.16 | -0.37 – 0.25 | -0.37 | 0.709 |
| cumulative 0v2TRUE | -0.14 | 0.12 | -0.23 | 0.19 | -0.37 – 0.09 | -0.59 – 0.14 | -1.21 | 0.226 |
| Observations | 205 | |||||||
| R2 / R2 adjusted | 0.007 / -0.002 | |||||||
#calculate effect sizes (eta squared/change in r squared) -- covariates
alpha_div_cumulative_complete <-
alpha_diversity_stress_usable %>%
filter(!is.na(cumulative_adv) & !is.na(deliv_mode) & !is.na(sex_binary) &!is.na(monthly_income_per_member) & !is.na(Sequencing_batch)) #first, create dataset of complete cases (required for eta squared calculation)
eta_cumul_shannon_covs <- with(alpha_div_cumulative_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, shannon_entropy))$estimate^2)
kable(eta_cumul_shannon_covs[,ncol(eta_cumul_shannon_covs)], col.names="R^2 of each variable on Shannon Index") %>% kable_styling()
| R^2 of each variable on Shannon Index | |
|---|---|
| cumulative_0v1 | 0.0000019 |
| cumulative_0v2 | 0.0040735 |
| deliv_mode | 0.0100456 |
| sex_binary | 0.0026858 |
| monthly_income_per_member | 0.0012375 |
| Sequencing_batch | 0.0191179 |
| shannon_entropy | 1.0000000 |
#calculate effect sizes (eta squared/ change in r squared) -- no covariates
alpha_div_cumulative_nocovs_complete <-
alpha_diversity_stress_usable %>%
filter(!is.na(cumulative_adv))
eta_cumul_shannon_ncovs <- with(alpha_div_cumulative_nocovs_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, shannon_entropy))$estimate^2)
kable(eta_cumul_shannon_ncovs[,ncol(eta_cumul_shannon_ncovs)], col.names="R^2 of each variable on Shannon Index") %>% kable_styling()
| R^2 of each variable on Shannon Index | |
|---|---|
| cumulative_0v1 | 0.0005724 |
| cumulative_0v2 | 0.0060810 |
| shannon_entropy | 1.0000000 |
#FAITH PD
#define models
base_mod_cumulative0v2_faith <- lm(faith_pd~cumulative_0v1 + cumulative_0v2, data = alpha_diversity_stress_usable)
cov1_mod_cumulative0v2_faith <- lm(faith_pd~cumulative_0v1 + cumulative_0v2+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable)
#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_cumulative0v2_faith, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/faith_cumulative_covs.doc"))
| faith pd | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 6.53 | 0.47 | 0.29 | 0.20 | 5.60 – 7.46 | -0.10 – 0.68 | 13.91 | <0.001 |
| cumulative 0v1TRUE | -0.59 | 0.35 | -0.32 | 0.19 | -1.29 – 0.10 | -0.69 – 0.06 | -1.68 | 0.095 |
| cumulative 0v2TRUE | -0.68 | 0.37 | -0.37 | 0.20 | -1.40 – 0.05 | -0.76 – 0.03 | -1.84 | 0.067 |
| deliv mode [1] | -0.14 | 0.29 | -0.08 | 0.16 | -0.72 – 0.43 | -0.39 – 0.23 | -0.50 | 0.620 |
| sex binary [1] | -0.28 | 0.28 | -0.15 | 0.15 | -0.83 – 0.27 | -0.45 – 0.15 | -1.00 | 0.321 |
| monthly income per member | -0.00 | 0.00 | -0.05 | 0.07 | -0.00 – 0.00 | -0.19 – 0.09 | -0.74 | 0.463 |
| Sequencing batch [batch2] | 0.04 | 0.32 | 0.02 | 0.17 | -0.60 – 0.67 | -0.32 – 0.36 | 0.12 | 0.905 |
| Observations | 186 | |||||||
| R2 / R2 adjusted | 0.035 / 0.003 | |||||||
knitr::knit_print(tab_model(base_mod_cumulative0v2_faith, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/faith_cumulative_nocovs.doc"))
| faith pd | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 6.13 | 0.25 | 0.20 | 0.14 | 5.65 – 6.62 | -0.07 – 0.47 | 24.85 | <0.001 |
| cumulative 0v1TRUE | -0.55 | 0.29 | -0.31 | 0.16 | -1.12 – 0.03 | -0.63 – 0.02 | -1.86 | 0.065 |
| cumulative 0v2TRUE | -0.67 | 0.32 | -0.38 | 0.18 | -1.30 – -0.04 | -0.73 – -0.02 | -2.09 | 0.038 |
| Observations | 205 | |||||||
| R2 / R2 adjusted | 0.027 / 0.018 | |||||||
#calculate effect sizes (eta squared/change in r squared) -- covariates
eta_cumul_faith_covs <- with(alpha_div_cumulative_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, faith_pd))$estimate^2)
kable(eta_cumul_faith_covs[,ncol(eta_cumul_faith_covs)], col.names="R^2 of each variable on Faith's PD") %>% kable_styling()
| R^2 of each variable on Faith’s PD | |
|---|---|
| cumulative_0v1 | 0.0156999 |
| cumulative_0v2 | 0.0149369 |
| deliv_mode | 0.0012919 |
| sex_binary | 0.0056130 |
| monthly_income_per_member | 0.0027463 |
| Sequencing_batch | 0.0000874 |
| faith_pd | 1.0000000 |
#calculate effect sizes (eta squared/change in r squared) -- no covariates
eta_cumul_faith_ncovs <- with(alpha_div_cumulative_nocovs_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, faith_pd))$estimate^2)
kable(eta_cumul_faith_ncovs[,ncol(eta_cumul_faith_ncovs)], col.names="R^2 of each variable on Faith's PD") %>% kable_styling()
| R^2 of each variable on Faith’s PD | |
|---|---|
| cumulative_0v1 | 0.0152315 |
| cumulative_0v2 | 0.0169529 |
| faith_pd | 1.0000000 |
#OBSERVED FEATURES
#define models
base_mod_cumulative_feat <- lm(observed_features~cumulative_0v1 + cumulative_0v2, data = alpha_diversity_stress_usable)
cov1_mod_cumulative_feat <- lm(observed_features~cumulative_0v1 + cumulative_0v2+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable)
#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_cumulative_feat, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/observed_cumulative_covs.doc"))
| observed features | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 87.35 | 4.19 | 0.15 | 0.17 | 79.08 – 95.62 | -0.19 – 0.48 | 20.85 | <0.001 |
| cumulative 0v1TRUE | 2.47 | 3.04 | 0.13 | 0.16 | -3.52 – 8.46 | -0.19 – 0.46 | 0.81 | 0.416 |
| cumulative 0v2TRUE | 0.54 | 4.06 | 0.03 | 0.22 | -7.47 – 8.55 | -0.40 – 0.46 | 0.13 | 0.894 |
| deliv mode [1] | -3.15 | 3.08 | -0.17 | 0.17 | -9.24 – 2.94 | -0.50 – 0.16 | -1.02 | 0.308 |
| sex binary [1] | -5.40 | 2.71 | -0.29 | 0.15 | -10.74 – -0.06 | -0.58 – -0.00 | -1.99 | 0.048 |
| monthly income per member | -0.00 | 0.00 | -0.14 | 0.08 | -0.01 – 0.00 | -0.28 – 0.01 | -1.81 | 0.072 |
| Sequencing batch [batch2] | -0.83 | 3.11 | -0.04 | 0.17 | -6.96 – 5.31 | -0.37 – 0.29 | -0.27 | 0.791 |
| Observations | 186 | |||||||
| R2 / R2 adjusted | 0.053 / 0.021 | |||||||
knitr::knit_print(tab_model(base_mod_cumulative_feat, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/observed_cumulative_nocovs.doc"))
| observed features | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 79.00 | 1.83 | -0.05 | 0.10 | 75.39 – 82.61 | -0.24 – 0.15 | 43.11 | <0.001 |
| cumulative 0v1TRUE | 2.17 | 2.77 | 0.12 | 0.15 | -3.30 – 7.64 | -0.18 – 0.42 | 0.78 | 0.435 |
| cumulative 0v2TRUE | 0.11 | 3.66 | 0.01 | 0.20 | -7.11 – 7.33 | -0.39 – 0.40 | 0.03 | 0.976 |
| Observations | 205 | |||||||
| R2 / R2 adjusted | 0.003 / -0.007 | |||||||
#calculate effect sizes (eta squared/change in r squared) -- covariates
eta_cumul_feat_covs <- with(alpha_div_cumulative_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, observed_features))$estimate^2)
kable(eta_cumul_feat_covs[,ncol(eta_cumul_feat_covs)], col.names="R^2 of each variable on Observed Features") %>% kable_styling()
| R^2 of each variable on Observed Features | |
|---|---|
| cumulative_0v1 | 0.0028212 |
| cumulative_0v2 | 0.0000986 |
| deliv_mode | 0.0061449 |
| sex_binary | 0.0209539 |
| monthly_income_per_member | 0.0182633 |
| Sequencing_batch | 0.0004046 |
| observed_features | 1.0000000 |
#calculate effect sizes (eta squared/change in r squared) -- no covariates
eta_cumul_feat_ncovs <- with(alpha_div_cumulative_nocovs_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, observed_features))$estimate^2)
kable(eta_cumul_feat_ncovs[,ncol(eta_cumul_feat_ncovs)], col.names="R^2 of each variable on Observed Features") %>% kable_styling()
| R^2 of each variable on Observed Features | |
|---|---|
| cumulative_0v1 | 0.0022817 |
| cumulative_0v2 | 0.0000043 |
| observed_features | 1.0000000 |
#PIELOU EVENNESS
#define models
base_mod_cumulative0v2_even <- lm(pielou_evenness~cumulative_0v1 + cumulative_0v2, data = alpha_diversity_stress_usable)
cov1_mod_cumulative0v2_even <- lm(pielou_evenness~cumulative_0v1 + cumulative_0v2+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable)
#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(base_mod_cumulative0v2_even, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/even_cumulative_nocovs.doc"))
| pielou evenness | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 0.53 | 0.01 | 0.10 | 0.10 | 0.51 – 0.54 | -0.11 – 0.30 | 59.66 | <0.001 |
| cumulative 0v1TRUE | -0.01 | 0.01 | -0.11 | 0.16 | -0.04 – 0.02 | -0.42 – 0.20 | -0.69 | 0.491 |
| cumulative 0v2TRUE | -0.02 | 0.02 | -0.25 | 0.19 | -0.05 – 0.01 | -0.62 – 0.12 | -1.35 | 0.179 |
| Observations | 205 | |||||||
| R2 / R2 adjusted | 0.009 / -0.001 | |||||||
knitr::knit_print(tab_model(cov1_mod_cumulative0v2_even, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/even_cumulative_covs.doc"))
| pielou evenness | ||||||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | std. Error | std. Beta | standardized std. Error | CI | standardized CI | Statistic | p |
| (Intercept) | 0.53 | 0.02 | 0.14 | 0.16 | 0.50 – 0.56 | -0.18 – 0.45 | 32.38 | <0.001 |
| cumulative 0v1TRUE | -0.00 | 0.01 | -0.05 | 0.17 | -0.03 – 0.02 | -0.38 – 0.29 | -0.27 | 0.789 |
| cumulative 0v2TRUE | -0.02 | 0.02 | -0.22 | 0.20 | -0.05 – 0.02 | -0.62 – 0.18 | -1.07 | 0.286 |
| deliv mode [1] | -0.02 | 0.01 | -0.21 | 0.16 | -0.04 – 0.01 | -0.53 – 0.12 | -1.25 | 0.211 |
| sex binary [1] | 0.02 | 0.01 | 0.21 | 0.15 | -0.01 – 0.04 | -0.08 – 0.50 | 1.40 | 0.163 |
| monthly income per member | 0.00 | 0.00 | 0.00 | 0.07 | -0.00 – 0.00 | -0.14 – 0.14 | 0.02 | 0.985 |
| Sequencing batch [batch2] | -0.03 | 0.02 | -0.36 | 0.19 | -0.06 – 0.00 | -0.73 – 0.01 | -1.95 | 0.053 |
| Observations | 186 | |||||||
| R2 / R2 adjusted | 0.051 / 0.019 | |||||||
#calculate effect sizes (eta squared/change in r squared) -- covariates
eta_cumul_even_covs <- with(alpha_div_cumulative_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, pielou_evenness))$estimate^2)
kable(eta_cumul_even_covs[,ncol(eta_cumul_even_covs)], col.names="R^2 of each variable on Pielou Evenness") %>% kable_styling()
| R^2 of each variable on Pielou Evenness | |
|---|---|
| cumulative_0v1 | 0.0003256 |
| cumulative_0v2 | 0.0055022 |
| deliv_mode | 0.0089445 |
| sex_binary | 0.0106898 |
| monthly_income_per_member | 0.0000018 |
| Sequencing_batch | 0.0263969 |
| pielou_evenness | 1.0000000 |
#calculate effect sizes (eta squared/change in r squared) -- no covariates
eta_cumul_even_ncovs <- with(alpha_div_cumulative_nocovs_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, pielou_evenness))$estimate^2)
kable(eta_cumul_even_ncovs[,ncol(eta_cumul_even_ncovs)], col.names="R^2 of each variable on Pielou Evenness") %>% kable_styling()
| R^2 of each variable on Pielou Evenness | |
|---|---|
| cumulative_0v1 | 0.0019474 |
| cumulative_0v2 | 0.0074880 |
| pielou_evenness | 1.0000000 |
#postnatal adversity
d_postnatal <- alpha_diversity_stress_usable %>% dplyr::select(subjid, postnatal_stress, shannon_entropy, faith_pd, pielou_evenness, observed_features) #grab only needed vars
# Than transform the data to long format as stated already in the comments using reshape function melt
d_postnatal <- d_postnatal %>%
mutate(
postnatal_stress_s = case_when(
postnatal_stress==1 ~ "yes",
postnatal_stress==0 ~ "no"
)
) %>%
dplyr::select(
-postnatal_stress
)
d_postnatal_long <- melt(d_postnatal)
## Using subjid, postnatal_stress_s as id variables
d_postnatal_long_complete <- d_postnatal_long %>% filter(across(.cols=everything(), ~!is.na(.x))) #create complete dataset
#Boxplots
ggplot(d_postnatal_long_complete, aes(y = value, x = postnatal_stress_s, colour = postnatal_stress_s)) +
geom_jitter() +
geom_boxplot(alpha=0.3, outlier.shape=NA) +
xlab("Postnatal Stress Exposure") +
ylab("Value") +
facet_wrap( ~ variable, scales = "free", labeller= as_labeller(c('shannon_entropy' = "Shannon's Index", 'faith_pd' = "Faith's PD", 'pielou_evenness' = "Pielou Evenness", 'observed_features' = "Observed Features"))) +
theme_bw() +
stat_summary(fun=mean, colour='royalblue3', geom='point', shape='-', size=10) +
scale_color_discrete("Postnatal Stress Exposure", labels = c("no (N = 233)", "yes (N= 76)"))
#Prenatal Adversity
grid.arrange(ggplot(alpha_diversity_stress_usable, aes(STAI_prorated_state_pw26, shannon_entropy))+geom_point(size=1)+geom_smooth(method="lm") + labs(x="Prenatal Adversity Score", y= "Shannon Index"),
ggplot(alpha_diversity_stress_usable, aes(STAI_prorated_state_pw26, faith_pd))+geom_point(size=1)+geom_smooth(method="lm")+ labs(x="Prenatal Adversity Score", y="Faith's PD"),
ggplot(alpha_diversity_stress_usable, aes(STAI_prorated_state_pw26, pielou_evenness))+geom_point(size=1)+geom_smooth(method="lm")+ labs(x="Prenatal Adversity Score", y="Pielou Evenness"),
ggplot(alpha_diversity_stress_usable, aes(STAI_prorated_state_pw26, observed_features))+geom_point(size=1)+geom_smooth(method="lm")+ labs(x="Prenatal Adversity Score", y="Observed Features"))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
#Preconception adversity
grid.arrange(ggplot(alpha_diversity_stress_usable, aes(ctq_total_score_M54, shannon_entropy))+geom_point(size=1)+geom_smooth(method="lm") + labs(x="Preconception Adversity Score", y= "Shannon Index"),
ggplot(alpha_diversity_stress_usable, aes(ctq_total_score_M54, faith_pd))+geom_point(size=1)+geom_smooth(method="lm")+ labs(x="Prenatal Adversity Score", y="Faith's PD"),
ggplot(alpha_diversity_stress_usable, aes(ctq_total_score_M54, pielou_evenness))+geom_point(size=1)+geom_smooth(method="lm")+ labs(x="Preconception Adversity Score", y="Pielou Evenness"),
ggplot(alpha_diversity_stress_usable, aes(ctq_total_score_M54, observed_features))+geom_point(size=1)+geom_smooth(method="lm")+ labs(x="Preconception Adversity Score", y="Observed Features"))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
#cumulative adversity
d_cumulative <- alpha_diversity_stress_usable %>% dplyr::select(subjid, cumulative_adv, shannon_entropy, faith_pd, pielou_evenness, observed_features)
d_cumulative_long <- melt(d_cumulative)
## Using subjid, cumulative_adv as id variables
d_cumulative_long_complete <- d_cumulative_long %>% filter(across(.cols=everything(), ~!is.na(.x))) #create complete dataset
#Boxplots
ggplot(d_cumulative_long_complete, aes(y = value, x = cumulative_adv, colour = cumulative_adv)) +
geom_jitter() +
geom_boxplot(alpha=0.3, outlier.shape=NA) +
xlab("Cumulative Adversity Exposure") +
ylab("Value") +
facet_wrap( ~ variable, scales = "free", labeller= as_labeller(c('shannon_entropy' = "Shannon Index", 'faith_pd' = "Faith's PD", 'pielou_evenness' = "Pielou Evenness", 'observed_features' = "Observed Features"))) +
theme_bw() +
stat_summary(fun=mean, colour='royalblue3', geom='point', shape='-', size=10) +
scale_color_discrete("Cumulative Adversity Exposure", labels = c("0 timepoints", "1 timepoint", "2+ timepoints"))
### Figure 1: Faith’s PD is Lower Among Children with Higher Postnatal
Adversity
#create group means dataset for a quick look at magnitude of effect
faith_means <- alpha_diversity_stress_usable %>%
filter(!is.na(postnatal_stress)) %>%
group_by(postnatal_stress) %>%
summarise(faith_pd = mean(faith_pd))
#add postnatal stress string variable to make plotting easier
alpha_diversity_stress_usable <-
alpha_diversity_stress_usable %>%
mutate(
postnatal_stress_str = case_when(
postnatal_stress == 0 ~ "no",
postnatal_stress == 1 ~ "yes"
)
)
#create residual for faith pd after removing variance associated with covariates
faith_resid <- lm(faith_pd ~sex_binary+deliv_mode+monthly_income_per_member+Sequencing_batch+ctq_total_score_M54+STAI_prorated_state_pw26, data = alpha_diversity_stress_usable)
alpha_diversity_stress_usable_faith <-
alpha_diversity_stress_usable %>%
filter(!is.na(sex_binary) & !is.na(deliv_mode) &!is.na(monthly_income_per_member)&!is.na(Sequencing_batch) & !is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26)) %>%
mutate(
faith_resid = resid(faith_resid)
)
#determine Ns for the graph labels
alpha_diversity_stress_usable_faith %>% dplyr::count(postnatal_stress==1)
## postnatal_stress == 1 n
## 1 FALSE 138
## 2 TRUE 48
## 3 NA 63
#boxplot with faith by postnatal adversity (raw values)
ggplot(alpha_diversity_stress_usable %>% filter(!is.na(postnatal_stress_str)), aes(y = faith_pd, x = postnatal_stress_str, color = postnatal_stress_str)) +
geom_point(position=position_jitter(w=0.3, h=0)) +
geom_boxplot(alpha=0.3, outlier.shape=NA) +
xlab("Postnatal Adversity Exposure") +
ylab("Faith's Phylogenetic Diversity") +
stat_summary(fun=mean, colour='royalblue3', geom='point', shape='-', size=20) +
scale_color_discrete("Postnatal Adversity Exposure", labels = c("no (N = 138)", "yes (N = 48)"))
#boxplot with faith by postnatal adversity (partial correlations)
ggplot(alpha_diversity_stress_usable_faith %>% filter(!is.na(postnatal_stress_str)), aes(y = faith_resid, x = postnatal_stress_str, color = postnatal_stress_str)) +
geom_point(position=position_jitter(w=0.3, h=0)) +
geom_boxplot(alpha=0.3, outlier.shape=NA) +
xlab("Postnatal Adversity Exposure") +
scale_x_discrete("Postnatal Adversity Exposure", labels = c("no" = "No (N=138)", "yes" = "Yes (N=48)")
) +
ylab("Faith's Phylogenetic Diversity (residuals)") +
stat_summary(fun=mean, colour='royalblue3', geom='point', shape='-', size=20) +
theme_minimal() +
theme(#legend.position="none",#hide grid lines
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
)
#save graph with Faith PD residuals for inclusion in manuscript
#ggsave("../../manuscripts/fran_adversity_microbiome/figures/faith_postnat_resid.png", height=4, width=8)
Qiime forum with explanation of PERMANOVA and permdisp tests: https://forum.qiime2.org/t/understanding-beta-group-significance-permanova-results/12648/4
Info about effect size in adonis (R^2): http://qiime.org/tutorials/category_comparison.html
### Covariate Selection Potential covariates: sex_binary, delivery mode,
breastfeeding vars, ethnicity, antibiotics, probiotics, sequencing
batch
physeq_M24_sex <- subset_samples(physeq_M24, sex_binary != "NA") #create a dataset that is complete on sex; necessary for distance matrix calculations to work
#get distance matrices (weighted unifrac, unweighted unifrac, bray-curtis, jaccard)
set.seed(main.seed) #need to run this right before all Unifrac distance matrices calculations
M24_wunifrac_sex <- phyloseq::distance(physeq_M24_sex, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_sex <- phyloseq::distance(physeq_M24_sex, method = "unifrac", type = "samples")
M24_jaccard_sex <- phyloseq::distance(physeq_M24_sex, method = "jaccard", type = "samples")
M24_bc_sex <- phyloseq::distance(physeq_M24_sex, method = "bray", type = "samples")
#get dataframe of metadata
metadata_physeq_M24_sex <- data.frame(sample_data(physeq_M24_sex)) #get data frame of metadata
#perform PERMANOVAs with each distance matrix
set.seed(main.seed) #need to run this before Unifrac distance matrices calculations
test <- adonis(M24_wunifrac_sex~sex_binary, data=metadata_physeq_M24_sex)#ns
set.seed(main.seed)
adonis(M24_unifrac_sex~sex_binary, data=metadata_physeq_M24_sex) #ns
##
## Call:
## adonis(formula = M24_unifrac_sex ~ sex_binary, data = metadata_physeq_M24_sex)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## sex_binary 1 0.184 0.18370 1.2401 0.00267 0.16
## Residuals 463 68.585 0.14813 0.99733
## Total 464 68.769 1.00000
adonis(M24_jaccard_sex~sex_binary, data=metadata_physeq_M24_sex) #ns
##
## Call:
## adonis(formula = M24_jaccard_sex ~ sex_binary, data = metadata_physeq_M24_sex)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## sex_binary 1 0.301 0.30125 0.88172 0.0019 0.681
## Residuals 463 158.191 0.34167 0.9981
## Total 464 158.492 1.0000
adonis(M24_bc_sex~sex_binary, data=metadata_physeq_M24_sex) #ns
##
## Call:
## adonis(formula = M24_bc_sex ~ sex_binary, data = metadata_physeq_M24_sex)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## sex_binary 1 0.235 0.23465 0.89675 0.00193 0.559
## Residuals 463 121.153 0.26167 0.99807
## Total 464 121.388 1.00000
#ANTIBIOTICS
physeq_M24_antibio <- subset_samples(physeq_M24, antibio_M24 != "NA")
#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_antibio <- phyloseq::distance(physeq_M24_antibio, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_antibio <- phyloseq::distance(physeq_M24_antibio, method = "unifrac", type = "samples")
M24_jaccard_antibio <- phyloseq::distance(physeq_M24_antibio, method = "jaccard", type = "samples")
M24_bc_antibio <- phyloseq::distance(physeq_M24_antibio, method = "bray", type = "samples")
#get dataframe of metadata
metadata_physeq_M24_antibio <- data.frame(sample_data(physeq_M24_antibio))
#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_antibio~antibio_M24, data=metadata_physeq_M24_antibio) #sig
##
## Call:
## adonis(formula = M24_wunifrac_antibio ~ antibio_M24, data = metadata_physeq_M24_antibio)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## antibio_M24 1 0.224 0.224326 3.3052 0.00709 0.004 **
## Residuals 463 31.424 0.067871 0.99291
## Total 464 31.649 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_unifrac_antibio~antibio_M24, data=metadata_physeq_M24_antibio) #ns
##
## Call:
## adonis(formula = M24_unifrac_antibio ~ antibio_M24, data = metadata_physeq_M24_antibio)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## antibio_M24 1 0.135 0.13542 0.91353 0.00197 0.54
## Residuals 463 68.634 0.14824 0.99803
## Total 464 68.769 1.00000
adonis(M24_jaccard_antibio~antibio_M24, data=metadata_physeq_M24_antibio) #ns
##
## Call:
## adonis(formula = M24_jaccard_antibio ~ antibio_M24, data = metadata_physeq_M24_antibio)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## antibio_M24 1 0.475 0.47496 1.3917 0.003 0.06 .
## Residuals 463 158.018 0.34129 0.997
## Total 464 158.492 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_bc_antibio~antibio_M24, data=metadata_physeq_M24_antibio) #ns
##
## Call:
## adonis(formula = M24_bc_antibio ~ antibio_M24, data = metadata_physeq_M24_antibio)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## antibio_M24 1 0.43 0.42988 1.6455 0.00354 0.053 .
## Residuals 463 120.96 0.26125 0.99646
## Total 464 121.39 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#PROBIOTICS
physeq_M24_probio <- subset_samples(physeq_M24, probiotics_M24 != "NA")
#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_probio <- phyloseq::distance(physeq_M24_probio, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_probio <- phyloseq::distance(physeq_M24_probio, method = "unifrac", type = "samples")
M24_jaccard_probio <- phyloseq::distance(physeq_M24_probio, method = "jaccard", type = "samples")
M24_bc_probio <- phyloseq::distance(physeq_M24_probio, method = "bray", type = "samples")
#get dataframe of metadata
metadata_physeq_M24_probio <- data.frame(sample_data(physeq_M24_probio))
#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_probio~probiotics_M24, data=metadata_physeq_M24_probio) #ns
##
## Call:
## adonis(formula = M24_wunifrac_probio ~ probiotics_M24, data = metadata_physeq_M24_probio)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## probiotics_M24 1 0.118 0.11801 1.7393 0.00383 0.117
## Residuals 452 30.668 0.06785 0.99617
## Total 453 30.786 1.00000
adonis(M24_unifrac_probio~probiotics_M24, data=metadata_physeq_M24_probio) #ns
##
## Call:
## adonis(formula = M24_unifrac_probio ~ probiotics_M24, data = metadata_physeq_M24_probio)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## probiotics_M24 1 0.115 0.11527 0.7744 0.00171 0.778
## Residuals 452 67.281 0.14885 0.99829
## Total 453 67.396 1.00000
adonis(M24_jaccard_probio~probiotics_M24, data=metadata_physeq_M24_probio) #ns
##
## Call:
## adonis(formula = M24_jaccard_probio ~ probiotics_M24, data = metadata_physeq_M24_probio)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## probiotics_M24 1 0.366 0.36588 1.0712 0.00236 0.322
## Residuals 452 154.392 0.34158 0.99764
## Total 453 154.758 1.00000
adonis(M24_bc_probio~probiotics_M24, data=metadata_physeq_M24_probio) #ns
##
## Call:
## adonis(formula = M24_bc_probio ~ probiotics_M24, data = metadata_physeq_M24_probio)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## probiotics_M24 1 0.302 0.30243 1.1556 0.00255 0.256
## Residuals 452 118.288 0.26170 0.99745
## Total 453 118.591 1.00000
#DELIVERY MODE
physeq_M24_dm <- subset_samples(physeq_M24, deliv_mode != "NA")
#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_dm <- phyloseq::distance(physeq_M24_dm, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_dm <- phyloseq::distance(physeq_M24_dm, method = "unifrac", type = "samples")
M24_jaccard_dm <- phyloseq::distance(physeq_M24_dm, method = "jaccard", type = "samples")
M24_bc_dm <- phyloseq::distance(physeq_M24_dm, method = "bray", type = "samples")
#get dataframe of metadata
metadata_physeq_M24_dm <- data.frame(sample_data(physeq_M24_dm))
#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_dm~deliv_mode, data=metadata_physeq_M24_dm)#ns
##
## Call:
## adonis(formula = M24_wunifrac_dm ~ deliv_mode, data = metadata_physeq_M24_dm)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## deliv_mode 1 0.047 0.046999 0.68859 0.00149 0.671
## Residuals 463 31.602 0.068254 0.99851
## Total 464 31.649 1.00000
set.seed(main.seed)
adonis(M24_unifrac_dm~deliv_mode, data=metadata_physeq_M24_dm) #significant
##
## Call:
## adonis(formula = M24_unifrac_dm ~ deliv_mode, data = metadata_physeq_M24_dm)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## deliv_mode 1 0.318 0.31815 2.1519 0.00463 0.01 **
## Residuals 463 68.451 0.14784 0.99537
## Total 464 68.769 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_jaccard_dm~deliv_mode, data=metadata_physeq_M24_dm) #ns
##
## Call:
## adonis(formula = M24_jaccard_dm ~ deliv_mode, data = metadata_physeq_M24_dm)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## deliv_mode 1 0.336 0.33620 0.98422 0.00212 0.472
## Residuals 463 158.156 0.34159 0.99788
## Total 464 158.492 1.00000
adonis(M24_bc_dm~deliv_mode, data=metadata_physeq_M24_dm) #ns
##
## Call:
## adonis(formula = M24_bc_dm ~ deliv_mode, data = metadata_physeq_M24_dm)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## deliv_mode 1 0.241 0.24074 0.92007 0.00198 0.51
## Residuals 463 121.147 0.26166 0.99802
## Total 464 121.388 1.00000
#ETHNICITY
physeq_M24_eth <- subset_samples(physeq_M24, mother_ethnicity != "NA")
#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_eth <- phyloseq::distance(physeq_M24_eth, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_eth <- phyloseq::distance(physeq_M24_eth, method = "unifrac", type = "samples")
M24_jaccard_eth <- phyloseq::distance(physeq_M24_eth, method = "jaccard", type = "samples")
M24_bc_eth <- phyloseq::distance(physeq_M24_eth, method = "bray", type = "samples")
#get dataframe of metadata
metadata_physeq_M24_eth <- data.frame(sample_data(physeq_M24_eth))
#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_eth~mother_ethnicity, data=metadata_physeq_M24_eth) #ns
##
## Call:
## adonis(formula = M24_wunifrac_eth ~ mother_ethnicity, data = metadata_physeq_M24_eth)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## mother_ethnicity 2 0.142 0.071237 1.0446 0.0045 0.395
## Residuals 462 31.506 0.068195 0.9955
## Total 464 31.649 1.0000
adonis(M24_unifrac_eth~mother_ethnicity, data=metadata_physeq_M24_eth) #significant
##
## Call:
## adonis(formula = M24_unifrac_eth ~ mother_ethnicity, data = metadata_physeq_M24_eth)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## mother_ethnicity 2 0.445 0.22238 1.5037 0.00647 0.024 *
## Residuals 462 68.324 0.14789 0.99353
## Total 464 68.769 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_jaccard_eth~mother_ethnicity, data=metadata_physeq_M24_eth) #ns
##
## Call:
## adonis(formula = M24_jaccard_eth ~ mother_ethnicity, data = metadata_physeq_M24_eth)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## mother_ethnicity 2 0.914 0.45698 1.3398 0.00577 0.053 .
## Residuals 462 157.579 0.34108 0.99423
## Total 464 158.492 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_bc_eth~mother_ethnicity, data=metadata_physeq_M24_eth) #significant
##
## Call:
## adonis(formula = M24_bc_eth ~ mother_ethnicity, data = metadata_physeq_M24_eth)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## mother_ethnicity 2 0.78 0.39005 1.4941 0.00643 0.044 *
## Residuals 462 120.61 0.26106 0.99357
## Total 464 121.39 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#SEQUENCING BATCH
physeq_M24_seqba <- subset_samples(physeq_M24, Sequencing_batch != "NA")
#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_seqba <- phyloseq::distance(physeq_M24_seqba, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_seqba <- phyloseq::distance(physeq_M24_seqba, method = "unifrac", type = "samples")
M24_jaccard_seqba <- phyloseq::distance(physeq_M24_seqba, method = "jaccard", type = "samples")
M24_bc_seqba <- phyloseq::distance(physeq_M24_seqba, method = "bray", type = "samples")
#get dataframe of metadata
metadata_physeq_M24_seqba <- data.frame(sample_data(physeq_M24_seqba))
#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_seqba~Sequencing_batch, data=metadata_physeq_M24_seqba) #ns
##
## Call:
## adonis(formula = M24_wunifrac_seqba ~ Sequencing_batch, data = metadata_physeq_M24_seqba)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Sequencing_batch 1 0.054 0.053759 0.78599 0.00169 0.565
## Residuals 465 31.805 0.068397 0.99831
## Total 466 31.858 1.00000
adonis(M24_unifrac_seqba~Sequencing_batch, data=metadata_physeq_M24_seqba) #sig
##
## Call:
## adonis(formula = M24_unifrac_seqba ~ Sequencing_batch, data = metadata_physeq_M24_seqba)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Sequencing_batch 1 1.037 1.03698 7.087 0.01501 0.001 ***
## Residuals 465 68.039 0.14632 0.98499
## Total 466 69.076 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_jaccard_seqba~Sequencing_batch, data=metadata_physeq_M24_seqba) #sig
##
## Call:
## adonis(formula = M24_jaccard_seqba ~ Sequencing_batch, data = metadata_physeq_M24_seqba)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Sequencing_batch 1 0.863 0.86319 2.5336 0.00542 0.002 **
## Residuals 465 158.426 0.34070 0.99458
## Total 466 159.289 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_bc_seqba~Sequencing_batch, data=metadata_physeq_M24_seqba) #sig
##
## Call:
## adonis(formula = M24_bc_seqba ~ Sequencing_batch, data = metadata_physeq_M24_seqba)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Sequencing_batch 1 0.836 0.83610 3.2081 0.00685 0.001 ***
## Residuals 465 121.190 0.26062 0.99315
## Total 466 122.026 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#DURATION OF EXCLUSIVE BREASTFEEDING
physeq_M24_ebf <- subset_samples(physeq_M24, only_bf_months != "NA")
#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_ebf <- phyloseq::distance(physeq_M24_ebf, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_ebf <- phyloseq::distance(physeq_M24_ebf, method = "unifrac", type = "samples")
M24_jaccard_ebf <- phyloseq::distance(physeq_M24_ebf, method = "jaccard", type = "samples")
M24_bc_ebf <- phyloseq::distance(physeq_M24_ebf, method = "bray", type = "samples")
#get dataframe of metadata
metadata_physeq_M24_ebf <- data.frame(sample_data(physeq_M24_ebf))
#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_ebf~only_bf_months, data=metadata_physeq_M24_ebf)#significant
##
## Call:
## adonis(formula = M24_wunifrac_ebf ~ only_bf_months, data = metadata_physeq_M24_ebf)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## only_bf_months 3 0.414 0.137992 2.057 0.0135 0.009 **
## Residuals 451 30.255 0.067084 0.9865
## Total 454 30.669 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_unifrac_ebf~only_bf_months, data=metadata_physeq_M24_ebf) #significant
##
## Call:
## adonis(formula = M24_unifrac_ebf ~ only_bf_months, data = metadata_physeq_M24_ebf)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## only_bf_months 3 0.610 0.20323 1.3748 0.00906 0.038 *
## Residuals 451 66.669 0.14783 0.99094
## Total 454 67.279 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_jaccard_ebf~only_bf_months, data=metadata_physeq_M24_ebf) #significant
##
## Call:
## adonis(formula = M24_jaccard_ebf ~ only_bf_months, data = metadata_physeq_M24_ebf)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## only_bf_months 3 1.751 0.58356 1.7238 0.01134 0.002 **
## Residuals 451 152.680 0.33854 0.98866
## Total 454 154.431 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_bc_ebf~only_bf_months, data=metadata_physeq_M24_ebf) #significant
##
## Call:
## adonis(formula = M24_bc_ebf ~ only_bf_months, data = metadata_physeq_M24_ebf)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## only_bf_months 3 1.554 0.51811 2.0073 0.01318 0.002 **
## Residuals 451 116.407 0.25811 0.98682
## Total 454 117.961 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#DURATION OF ANY BREASTFEEDING
physeq_M24_abf <- subset_samples(physeq_M24, any_bf_months != "NA")
#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_abf <- phyloseq::distance(physeq_M24_abf, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_abf <- phyloseq::distance(physeq_M24_abf, method = "unifrac", type = "samples")
M24_jaccard_abf <- phyloseq::distance(physeq_M24_abf, method = "jaccard", type = "samples")
M24_bc_abf <- phyloseq::distance(physeq_M24_abf, method = "bray", type = "samples")
#get dataframe of metadata
metadata_physeq_M24_abf <- data.frame(sample_data(physeq_M24_abf))
#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_abf~any_bf_months, data=metadata_physeq_M24_abf) #sig
##
## Call:
## adonis(formula = M24_wunifrac_abf ~ any_bf_months, data = metadata_physeq_M24_abf)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## any_bf_months 4 0.4316 0.107899 1.5972 0.01406 0.048 *
## Residuals 448 30.2657 0.067557 0.98594
## Total 452 30.6973 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(main.seed)
adonis(M24_unifrac_abf~any_bf_months, data=metadata_physeq_M24_abf)
##
## Call:
## adonis(formula = M24_unifrac_abf ~ any_bf_months, data = metadata_physeq_M24_abf)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## any_bf_months 4 0.633 0.15833 1.0681 0.00945 0.301
## Residuals 448 66.409 0.14823 0.99055
## Total 452 67.042 1.00000
adonis(M24_jaccard_abf~any_bf_months, data=metadata_physeq_M24_abf)
##
## Call:
## adonis(formula = M24_jaccard_abf ~ any_bf_months, data = metadata_physeq_M24_abf)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## any_bf_months 4 1.971 0.49272 1.4525 0.0128 0.003 **
## Residuals 448 151.970 0.33922 0.9872
## Total 452 153.940 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_bc_abf~any_bf_months, data=metadata_physeq_M24_abf)
##
## Call:
## adonis(formula = M24_bc_abf ~ any_bf_months, data = metadata_physeq_M24_abf)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## any_bf_months 4 1.789 0.44731 1.7296 0.01521 0.005 **
## Residuals 448 115.859 0.25861 0.98479
## Total 452 117.649 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Determine if breastfeeding variables collinear; if so, only use one
alpha_diversity_stress_usable %$%
ctable(only_bf_months, any_bf_months,
prop = "r", chisq = TRUE, headings = FALSE
) %>%
print(
method = "render",
style = "rmarkdown",
footnote = NA
)
| any_bf_months | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| only_bf_months | <1M | >12M | 1M_to_3M | 3M_to_6M | 6M_to_12M | <NA> | Total | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| <1M | 82 | ( | 24.6% | ) | 39 | ( | 11.7% | ) | 72 | ( | 21.6% | ) | 70 | ( | 21.0% | ) | 65 | ( | 19.5% | ) | 6 | ( | 1.8% | ) | 334 | ( | 100.0% | ) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| 1M_to_3M | 0 | ( | 0.0% | ) | 3 | ( | 21.4% | ) | 0 | ( | 0.0% | ) | 6 | ( | 42.9% | ) | 4 | ( | 28.6% | ) | 1 | ( | 7.1% | ) | 14 | ( | 100.0% | ) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| 3M_to_6M | 0 | ( | 0.0% | ) | 26 | ( | 59.1% | ) | 0 | ( | 0.0% | ) | 4 | ( | 9.1% | ) | 14 | ( | 31.8% | ) | 0 | ( | 0.0% | ) | 44 | ( | 100.0% | ) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| 6M_to_12M | 0 | ( | 0.0% | ) | 36 | ( | 75.0% | ) | 0 | ( | 0.0% | ) | 0 | ( | 0.0% | ) | 12 | ( | 25.0% | ) | 0 | ( | 0.0% | ) | 48 | ( | 100.0% | ) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| <NA> | 0 | ( | 0.0% | ) | 0 | ( | 0.0% | ) | 0 | ( | 0.0% | ) | 0 | ( | 0.0% | ) | 5 | ( | 50.0% | ) | 5 | ( | 50.0% | ) | 10 | ( | 100.0% | ) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Total | 82 | ( | 18.2% | ) | 104 | ( | 23.1% | ) | 72 | ( | 16.0% | ) | 80 | ( | 17.8% | ) | 100 | ( | 22.2% | ) | 12 | ( | 2.7% | ) | 450 | ( | 100.0% | ) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Χ2 = 164.9789 df = 12 p = .0000 |
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Summary
#INCLUDING COVARIATES
#create a phyloseq object with complete cases on adversity variables and covaraites
physeq_M24_all <- subset_samples(physeq_M24, !is.na(postnatal_stress) & !is.na(maltreated_mod_M54) & !is.na(stai_state_above_clin_cutoff_pw26) & !is.na(only_bf_months) & !is.na(deliv_mode) & !is.na(mother_ethnicity) & !is.na(antibio_M24) & !is.na(Sequencing_batch)) #N=204 for this analysis
#calculate distance matrices using this complete cases dataset
set.seed(main.seed)
M24_wunifrac_all <- phyloseq::distance(physeq_M24_all, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_all <- phyloseq::distance(physeq_M24_all, method = "unifrac", type = "samples")
M24_bc_all <- phyloseq::distance(physeq_M24_all, method = "bray", type = "samples")
M24_jaccard_all <- phyloseq::distance(physeq_M24_all, method = "jaccard", type = "samples")
#get metadata
metadata_physeq_M24_all <- data.frame(sample_data(physeq_M24_all)) #get data frame of metadata
#test assumption of betadispersion
disp_all_maltreated <- betadisper(M24_wunifrac_all, metadata_physeq_M24_all$maltreated_mod_M54)
permutest(disp_all_maltreated) #yes, apparently they do have homogeneity of dispersion
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.00716 0.0071600 0.9545 999 0.338
## Residuals 202 1.51526 0.0075013
disp_all_stai <- betadisper(M24_wunifrac_all, metadata_physeq_M24_all$stai_state_above_clin_cutoff_pw26)
permutest(disp_all_stai) #yes, apparently they do have homogeneity of dispersion
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.00027 0.0002712 0.0357 999 0.842
## Residuals 202 1.53481 0.0075981
disp_all_leq <- betadisper(M24_wunifrac_all, metadata_physeq_M24_all$postnatal_stress)
permutest(disp_all_leq) #yes, apparently they do have homogeneity of dispersion
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.00743 0.0074292 0.9784 999 0.313
## Residuals 202 1.53378 0.0075930
#permanovas
#weighted unifrac
set.seed(main.seed)
wunifrac_all_covs <- adonis(M24_wunifrac_all~ stai_state_above_clin_cutoff_pw26 + postnatal_stress+maltreated_mod_M54+ mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_all)
wunifrac_all_covs #print output
##
## Call:
## adonis(formula = M24_wunifrac_all ~ stai_state_above_clin_cutoff_pw26 + postnatal_stress + maltreated_mod_M54 + mother_ethnicity + deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch, data = metadata_physeq_M24_all)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## stai_state_above_clin_cutoff_pw26 1 0.0296 0.029638 0.45265 0.00221 0.881
## postnatal_stress 1 0.0521 0.052147 0.79642 0.00389 0.561
## maltreated_mod_M54 1 0.0543 0.054297 0.82925 0.00405 0.577
## mother_ethnicity 2 0.2222 0.111085 1.69655 0.01657 0.066
## deliv_mode 1 0.0402 0.040162 0.61337 0.00300 0.721
## only_bf_months 3 0.1802 0.060082 0.91760 0.01344 0.543
## antibio_M24 1 0.1886 0.188649 2.88115 0.01407 0.010
## Sequencing_batch 1 0.0677 0.067720 1.03426 0.00505 0.352
## Residuals 192 12.5716 0.065477 0.93772
## Total 203 13.4066 1.00000
##
## stai_state_above_clin_cutoff_pw26
## postnatal_stress
## maltreated_mod_M54
## mother_ethnicity .
## deliv_mode
## only_bf_months
## antibio_M24 **
## Sequencing_batch
## Residuals
## Total
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(wunifrac_all_covs) #calculates omega squared for first predictor; change order of vars to get omega squared for other predictors
## [1] -0.002660215
#unweighted unifrac
set.seed(main.seed)
unifrac_all_covs <- adonis(M24_unifrac_all~ maltreated_mod_M54+ stai_state_above_clin_cutoff_pw26 + postnatal_stress + mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_all)
unifrac_all_covs #print output
##
## Call:
## adonis(formula = M24_unifrac_all ~ maltreated_mod_M54 + stai_state_above_clin_cutoff_pw26 + postnatal_stress + mother_ethnicity + deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch, data = metadata_physeq_M24_all)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## maltreated_mod_M54 1 0.1320 0.13201 0.9285 0.00453 0.508
## stai_state_above_clin_cutoff_pw26 1 0.1330 0.13302 0.9356 0.00456 0.482
## postnatal_stress 1 0.1383 0.13830 0.9727 0.00474 0.444
## mother_ethnicity 2 0.2942 0.14711 1.0347 0.01009 0.390
## deliv_mode 1 0.2002 0.20023 1.4083 0.00687 0.115
## only_bf_months 3 0.3860 0.12868 0.9051 0.01324 0.666
## antibio_M24 1 0.1168 0.11684 0.8218 0.00401 0.690
## Sequencing_batch 1 0.4529 0.45294 3.1857 0.01554 0.001
## Residuals 192 27.2981 0.14218 0.93641
## Total 203 29.1517 1.00000
##
## maltreated_mod_M54
## stai_state_above_clin_cutoff_pw26
## postnatal_stress
## mother_ethnicity
## deliv_mode
## only_bf_months
## antibio_M24
## Sequencing_batch ***
## Residuals
## Total
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(unifrac_all_covs)
## [1] -0.0003470041
#bray-curtis
bc_all_covs <- adonis(M24_bc_all~ maltreated_mod_M54+stai_state_above_clin_cutoff_pw26 + postnatal_stress+ mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_all)
bc_all_covs #print output
##
## Call:
## adonis(formula = M24_bc_all ~ maltreated_mod_M54 + stai_state_above_clin_cutoff_pw26 + postnatal_stress + mother_ethnicity + deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch, data = metadata_physeq_M24_all)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## maltreated_mod_M54 1 0.275 0.27522 1.08687 0.00527 0.326
## stai_state_above_clin_cutoff_pw26 1 0.171 0.17082 0.67458 0.00327 0.837
## postnatal_stress 1 0.158 0.15813 0.62448 0.00303 0.869
## mother_ethnicity 2 0.776 0.38776 1.53129 0.01484 0.036
## deliv_mode 1 0.238 0.23752 0.93797 0.00454 0.513
## only_bf_months 3 1.014 0.33816 1.33543 0.01941 0.084
## antibio_M24 1 0.370 0.37025 1.46216 0.00708 0.122
## Sequencing_batch 1 0.640 0.63989 2.52699 0.01224 0.006
## Residuals 192 48.619 0.25322 0.93031
## Total 203 52.261 1.00000
##
## maltreated_mod_M54
## stai_state_above_clin_cutoff_pw26
## postnatal_stress
## mother_ethnicity *
## deliv_mode
## only_bf_months .
## antibio_M24
## Sequencing_batch **
## Residuals
## Total
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(bc_all_covs)
## [1] 0.0004188679
#jaccard
jacc_all_covs <- adonis(M24_jaccard_all~maltreated_mod_M54+stai_state_above_clin_cutoff_pw26 +postnatal_stress+mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_all)
jacc_all_covs #print output
##
## Call:
## adonis(formula = M24_jaccard_all ~ maltreated_mod_M54 + stai_state_above_clin_cutoff_pw26 + postnatal_stress + mother_ethnicity + deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch, data = metadata_physeq_M24_all)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## maltreated_mod_M54 1 0.352 0.35163 1.04855 0.00511 0.375
## stai_state_above_clin_cutoff_pw26 1 0.265 0.26487 0.78983 0.00385 0.813
## postnatal_stress 1 0.252 0.25197 0.75136 0.00366 0.880
## mother_ethnicity 2 0.923 0.46140 1.37589 0.01341 0.039
## deliv_mode 1 0.333 0.33295 0.99285 0.00484 0.452
## only_bf_months 3 1.228 0.40933 1.22062 0.01785 0.059
## antibio_M24 1 0.422 0.42177 1.25774 0.00613 0.135
## Sequencing_batch 1 0.653 0.65268 1.94630 0.00948 0.002
## Residuals 192 64.386 0.33534 0.93567
## Total 203 68.813 1.00000
##
## maltreated_mod_M54
## stai_state_above_clin_cutoff_pw26
## postnatal_stress
## mother_ethnicity *
## deliv_mode
## only_bf_months .
## antibio_M24
## Sequencing_batch **
## Residuals
## Total
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(jacc_all_covs)
## [1] 0.000235455
#NOT INCLUDING COVARIATES
#create a dataset with complete cases on all adversities
physeq_M24_all_ncovs <- subset_samples(physeq_M24, !is.na(postnatal_stress) & !is.na(maltreated_mod_M54) & !is.na(stai_state_above_clin_cutoff_pw26))
#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_all_ncovs <- phyloseq::distance(physeq_M24_all_ncovs, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_all_ncovs <- phyloseq::distance(physeq_M24_all_ncovs, method = "unifrac", type = "samples")
M24_bc_all_ncovs <- phyloseq::distance(physeq_M24_all_ncovs, method = "bray", type = "samples")
M24_jaccard_all_ncovs <- phyloseq::distance(physeq_M24_all_ncovs, method = "jaccard", type = "samples")
#get metadata
metadata_physeq_M24_all_ncovs <- data.frame(sample_data(physeq_M24_all_ncovs)) #get data frame of metadata
#permanovas
#weighted unifrac
set.seed(main.seed)
wunifrac_all_ncovs <- adonis(M24_wunifrac_all_ncovs~ postnatal_stress+stai_state_above_clin_cutoff_pw26+maltreated_mod_M54, data=metadata_physeq_M24_all_ncovs)
wunifrac_all_ncovs #print output
##
## Call:
## adonis(formula = M24_wunifrac_all_ncovs ~ postnatal_stress + stai_state_above_clin_cutoff_pw26 + maltreated_mod_M54, data = metadata_physeq_M24_all_ncovs)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## postnatal_stress 1 0.0448 0.044811 0.66542 0.00321 0.698
## stai_state_above_clin_cutoff_pw26 1 0.0262 0.026193 0.38895 0.00188 0.917
## maltreated_mod_M54 1 0.0627 0.062721 0.93137 0.00450 0.451
## Residuals 205 13.8053 0.067343 0.99041
## Total 208 13.9391 1.00000
omega_sq_adonis(wunifrac_all_ncovs)
## [1] -0.001608689
#unweighted unifrac
set.seed(main.seed)
unifrac_all_ncovs <- adonis(M24_unifrac_all_ncovs~stai_state_above_clin_cutoff_pw26+postnatal_stress+maltreated_mod_M54, data=metadata_physeq_M24_all_ncovs)
unifrac_all_ncovs
##
## Call:
## adonis(formula = M24_unifrac_all_ncovs ~ stai_state_above_clin_cutoff_pw26 + postnatal_stress + maltreated_mod_M54, data = metadata_physeq_M24_all_ncovs)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## stai_state_above_clin_cutoff_pw26 1 0.1347 0.13467 0.93757 0.00451 0.506
## postnatal_stress 1 0.1285 0.12850 0.89463 0.00431 0.546
## maltreated_mod_M54 1 0.1291 0.12908 0.89866 0.00433 0.558
## Residuals 205 29.4454 0.14364 0.98685
## Total 208 29.8376 1.00000
omega_sq_adonis(unifrac_all_ncovs)
## [1] -0.0002990976
#bray-curtis
bc_all_ncovs <- adonis(M24_bc_all_ncovs~postnatal_stress+stai_state_above_clin_cutoff_pw26 +maltreated_mod_M54, data=metadata_physeq_M24_all_ncovs)
bc_all_ncovs
##
## Call:
## adonis(formula = M24_bc_all_ncovs ~ postnatal_stress + stai_state_above_clin_cutoff_pw26 + maltreated_mod_M54, data = metadata_physeq_M24_all_ncovs)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## postnatal_stress 1 0.131 0.13080 0.50103 0.00242 0.944
## stai_state_above_clin_cutoff_pw26 1 0.163 0.16327 0.62543 0.00302 0.876
## maltreated_mod_M54 1 0.292 0.29249 1.12042 0.00541 0.329
## Residuals 205 53.516 0.26105 0.98916
## Total 208 54.102 1.00000
omega_sq_adonis(bc_all_ncovs)
## [1] -0.002396033
#jaccard
jacc_all_ncovs <- adonis(M24_jaccard_all_ncovs~ postnatal_stress+maltreated_mod_M54+stai_state_above_clin_cutoff_pw26, data=metadata_physeq_M24_all_ncovs)
jacc_all_ncovs
##
## Call:
## adonis(formula = M24_jaccard_all_ncovs ~ postnatal_stress + maltreated_mod_M54 + stai_state_above_clin_cutoff_pw26, data = metadata_physeq_M24_all_ncovs)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## postnatal_stress 1 0.233 0.23282 0.68098 0.00328 0.957
## maltreated_mod_M54 1 0.356 0.35637 1.04239 0.00502 0.343
## stai_state_above_clin_cutoff_pw26 1 0.266 0.26574 0.77729 0.00375 0.856
## Residuals 205 70.086 0.34188 0.98795
## Total 208 70.941 1.00000
omega_sq_adonis(jacc_all_ncovs)
## [1] -0.001530048
#get dataset
physeq_M24_cumulative <- subset_samples(physeq_M24, !is.na(cumulative_adv) & !is.na(only_bf_months) & !is.na(deliv_mode) & !is.na(mother_ethnicity) & !is.na(antibio_M24) & !is.na(Sequencing_batch))
#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_cumulative <- phyloseq::distance(physeq_M24_cumulative, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_cumulative <- phyloseq::distance(physeq_M24_cumulative, method = "unifrac", type = "samples")
M24_bc_cumulative <- phyloseq::distance(physeq_M24_cumulative, method = "bray", type = "samples")
M24_jaccard_cumulative <- phyloseq::distance(physeq_M24_cumulative, method = "jaccard", type = "samples")
#get metadata
metadata_physeq_M24_cumulative <- data.frame(sample_data(physeq_M24_cumulative)) #get data frame of metadata
#test assumption of betadispersion
disp_cumulative<- betadisper(M24_wunifrac_cumulative, metadata_physeq_M24_cumulative$cumulative_adv)
permutest(disp_cumulative) #yes, apparently they do have homogeneity of dispersion
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 2 0.01007 0.0050361 0.6625 999 0.513
## Residuals 201 1.52784 0.0076012
plot(disp_cumulative, hull=FALSE, ellipse=TRUE)
#permanovas
#weighted unifrac
set.seed(main.seed)
wunifrac_cumulative_covs <- adonis(M24_wunifrac_cumulative~cumulative_adv+mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_cumulative)
wunifrac_cumulative_covs
##
## Call:
## adonis(formula = M24_wunifrac_cumulative ~ cumulative_adv + mother_ethnicity + deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch, data = metadata_physeq_M24_cumulative)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## cumulative_adv 2 0.0905 0.045267 0.69075 0.00675 0.747
## mother_ethnicity 2 0.2061 0.103066 1.57273 0.01538 0.095 .
## deliv_mode 1 0.0398 0.039844 0.60800 0.00297 0.725
## only_bf_months 3 0.1683 0.056086 0.85584 0.01255 0.634
## antibio_M24 1 0.1865 0.186534 2.84639 0.01391 0.016 *
## Sequencing_batch 1 0.0673 0.067325 1.02733 0.00502 0.358
## Residuals 193 12.6480 0.065533 0.94341
## Total 203 13.4066 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(wunifrac_cumulative_covs)
## [1] -0.003008632
#unweighted unifrac
set.seed(main.seed)
unifrac_cumulative_covs <- adonis(M24_unifrac_cumulative~cumulative_adv+mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_cumulative)
unifrac_cumulative_covs
##
## Call:
## adonis(formula = M24_unifrac_cumulative ~ cumulative_adv + mother_ethnicity + deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch, data = metadata_physeq_M24_cumulative)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## cumulative_adv 2 0.3462 0.17309 1.2210 0.01188 0.164
## mother_ethnicity 2 0.2942 0.14708 1.0375 0.01009 0.386
## deliv_mode 1 0.1977 0.19766 1.3943 0.00678 0.123
## only_bf_months 3 0.3831 0.12770 0.9008 0.01314 0.676
## antibio_M24 1 0.1158 0.11577 0.8167 0.00397 0.695
## Sequencing_batch 1 0.4552 0.45522 3.2112 0.01562 0.001 ***
## Residuals 193 27.3596 0.14176 0.93852
## Total 203 29.1517 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(unifrac_cumulative_covs)
## [1] 0.002139351
#bray-curtis
bc_cumulative_covs <- adonis(M24_bc_cumulative~cumulative_adv+mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_cumulative)
bc_cumulative_covs
##
## Call:
## adonis(formula = M24_bc_cumulative ~ cumulative_adv + mother_ethnicity + deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch, data = metadata_physeq_M24_cumulative)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## cumulative_adv 2 0.321 0.16065 0.63235 0.00615 0.953
## mother_ethnicity 2 0.732 0.36623 1.44158 0.01402 0.059 .
## deliv_mode 1 0.237 0.23706 0.93314 0.00454 0.520
## only_bf_months 3 0.951 0.31700 1.24780 0.01820 0.144
## antibio_M24 1 0.347 0.34676 1.36495 0.00664 0.161
## Sequencing_batch 1 0.642 0.64175 2.52613 0.01228 0.005 **
## Residuals 193 49.031 0.25404 0.93819
## Total 203 52.261 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(bc_cumulative_covs)
## [1] -0.003557018
#jaccard
jacc_cumulative_covs <- adonis(M24_jaccard_cumulative~cumulative_adv+mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_cumulative)
jacc_cumulative_covs
##
## Call:
## adonis(formula = M24_jaccard_cumulative ~ cumulative_adv + mother_ethnicity + deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch, data = metadata_physeq_M24_cumulative)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## cumulative_adv 2 0.481 0.24034 0.71504 0.00699 0.976
## mother_ethnicity 2 0.892 0.44578 1.32628 0.01296 0.055 .
## deliv_mode 1 0.333 0.33265 0.98969 0.00483 0.457
## only_bf_months 3 1.171 0.39038 1.16144 0.01702 0.114
## antibio_M24 1 0.413 0.41306 1.22892 0.00600 0.155
## Sequencing_batch 1 0.654 0.65387 1.94538 0.00950 0.002 **
## Residuals 193 64.870 0.33611 0.94270
## Total 203 68.813 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(jacc_cumulative_covs)
## [1] -0.002770185
physeq_M24_cumulative_nocovs <- subset_samples(physeq_M24, !is.na(cumulative_adv))
#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_cumulative_ncovs <- phyloseq::distance(physeq_M24_cumulative_nocovs, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_cumulative_ncovs <- phyloseq::distance(physeq_M24_cumulative_nocovs, method = "unifrac", type = "samples")
M24_bc_cumulative_ncovs <- phyloseq::distance(physeq_M24_cumulative_nocovs, method = "bray", type = "samples")
M24_jaccard_cumulative_ncovs <- phyloseq::distance(physeq_M24_cumulative_nocovs, method = "jaccard", type = "samples")
#get metadata
metadata_physeq_M24_cumulative_nocovs <- data.frame(sample_data(physeq_M24_cumulative_nocovs)) #get data frame of metadata
#PERMANOVAS
#weighted unifrac
set.seed(main.seed)
wunifrac_cumulative_ncovs <- adonis(M24_wunifrac_cumulative_ncovs~cumulative_adv, data=metadata_physeq_M24_cumulative_nocovs)
wunifrac_cumulative_ncovs
##
## Call:
## adonis(formula = M24_wunifrac_cumulative_ncovs ~ cumulative_adv, data = metadata_physeq_M24_cumulative_nocovs)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## cumulative_adv 2 0.0966 0.048298 0.71876 0.00693 0.747
## Residuals 206 13.8425 0.067196 0.99307
## Total 208 13.9391 1.00000
omega_sq_adonis(wunifrac_cumulative_ncovs)
## [1] -0.002698585
#unweighted unifrac
set.seed(main.seed)
unifrac_cumulative_ncovs <- adonis(M24_unifrac_cumulative_ncovs~cumulative_adv, data=metadata_physeq_M24_cumulative_nocovs)
unifrac_cumulative_ncovs
##
## Call:
## adonis(formula = M24_unifrac_cumulative_ncovs ~ cumulative_adv, data = metadata_physeq_M24_cumulative_nocovs)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## cumulative_adv 2 0.328 0.16401 1.1449 0.01099 0.229
## Residuals 206 29.510 0.14325 0.98901
## Total 208 29.838 1.00000
omega_sq_adonis(unifrac_cumulative_ncovs)
## [1] 0.001384799
#bray-curtis
bc_cumulative_ncovs <- adonis(M24_bc_cumulative_ncovs~cumulative_adv, data=metadata_physeq_M24_cumulative_nocovs)
bc_cumulative_ncovs
##
## Call:
## adonis(formula = M24_bc_cumulative_ncovs ~ cumulative_adv, data = metadata_physeq_M24_cumulative_nocovs)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## cumulative_adv 2 0.335 0.16765 0.64232 0.0062 0.946
## Residuals 206 53.767 0.26100 0.9938
## Total 208 54.102 1.0000
omega_sq_adonis(bc_cumulative_ncovs)
## [1] -0.003434516
#jaccard
jacc_cumulative_ncovs <- adonis(M24_jaccard_cumulative_ncovs~cumulative_adv, data=metadata_physeq_M24_cumulative_nocovs)
jacc_cumulative_ncovs
##
## Call:
## adonis(formula = M24_jaccard_cumulative_ncovs ~ cumulative_adv, data = metadata_physeq_M24_cumulative_nocovs)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## cumulative_adv 2 0.494 0.24695 0.72214 0.00696 0.982
## Residuals 206 70.447 0.34197 0.99304
## Total 208 70.941 1.00000
omega_sq_adonis(jacc_cumulative_ncovs)
## [1] -0.002666081
Filtering out low prevalence taxa:
In the literature, different filtering thresholds are used based on data
features and research questions (e.g., whether rare taxa are considered
important) (Carlson et
al. (2021) Nature Communications).
For these analyses we chose to report the results after filtering at a
moderate threshold 0.5%, and included a more conservative (0.1%) and
more liberal (1%) threshold in the supplemental material:
Maaslin2 requires as input:
1. A tab-delimited metadata file (features as columns, samples as
rows)
2. A tab-delimited feature file (taxonomy as columns, samples as
rows)
#transform OTU counts to relative abundances
physeq_M24_relabund = transform_sample_counts(physeq_M24, function(x) x / sum(x))
#convert phyloseq OTU table to dataframe
M24_otus = as(otu_table(physeq_M24_relabund), "matrix") #convert from part of phyloseq object to matrix
M24_otus_df = as.data.frame(M24_otus) #convert to df
#provide name for first column (for merging)
M24_ra_otus_df <-
M24_otus_df %>%
rownames_to_column(var = "Feature.ID")
#join taxonomy file to OTU table to get taxa names for OTUs
taxonomy_txt_path <- str_c(general_mb_path, "input_files/M2448_taxonomy_edited.tsv") #taxonomy file path
M2448_tax <- read.table(taxonomy_txt_path, header=TRUE, sep = "\t") #read in taxa
M24_ra_tax_features <-
M2448_tax %>%
full_join(M24_ra_otus_df, by = "Feature.ID")
M24_ra_tax_features <-
M24_ra_tax_features %>%
filter('Feature.ID' != "OTU948") #filter out one OTU/taxa which has NA values for all samples (filtered out for alpha diversity calculations in qiime2 preprocessing)
#remove OTU names
M24_ra_tax_features <-
M24_ra_tax_features %>%
dplyr::select(
-'Feature.ID'
)
#transpose this taxonomy feature file so that taxonomy are columns
M24_ra_tax_features_t <- t(M24_ra_tax_features)
M24_ra_tax_features_t = setNames(data.frame(t(M24_ra_tax_features[,-1])), M24_ra_tax_features[,1])
#name sample ID column for matching with metadata in maaslin2
M24_ra_tax_features_t <-
M24_ra_tax_features_t %>%
rownames_to_column(var = '#SampleID')
#write new file combining taxonomy and OTU counts as .tsv
tax_features_ra_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/M24_tax_features_ra.tsv"
write_tsv(M24_ra_tax_features_t, tax_features_ra_fp)
#also write these data to form B data folder for submission with GUSTO form B application
write_csv(M24_ra_tax_features_t, "../../manuscripts/fran_adversity_microbiome/Form_B_submission/data_updated/microbiome_taxonomy_and_counts.csv")
#read in metadata as dataframe
M24_metadata <- read_tsv(str_c(general_mb_path, "input_files/M24_metadata_typeheader_new.tsv"))
#remove typeheader so that variable type can be read correctly by R
M24_metadata <- M24_metadata[-1, ]
#write metadata file with M24 samples as .tsv
M24_meta_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/M24_metadata.tsv"
write_tsv(M24_metadata, M24_meta_fp)
#manually open the .tsv and save it as .csv
csv_file <- "../../data/analysis_datasets/fran/maaslin2/inputs/M24_metadata.csv"
#read .csv into R -- after removing the typeheader, this will automatically assign each variable to the correct type
test <- read_csv(csv_file)
#manually assign correct variable type to binary variables that were read as double (numeric with decimal place) so that maaslin2 will treat those variables as categorical
test <- test %>%
dplyr::mutate(across(.cols = c(maltreated_mod_M54, stai_state_above_clin_cutoff_pw26, antibio_M24, probiotics_M24, cumulative_adv), ~as.character(.x)))
#create new descriptive variables for easier visualization when graphing
test <- test %>%
mutate(sex_binary_ch = case_when(
sex_binary == 1 ~ "male",
sex_binary== 0 ~ "female"
),
postnatal_stress_s = case_when(
postnatal_stress==1 ~ "yes",
postnatal_stress==0 ~ "no"
))
#remove variables that won't be used in maaslin2 analyses
test <- test %>%
dplyr::select(
-contains("mom"),
-sum_advrsty,
-deliv_mode
)
#write new metadata file with correct dadta types back to original file path
write_tsv(test, "../../data/analysis_datasets/fran/maaslin2/inputs/M24_metadata.tsv")
#load final metadata file for analysis below
M24_metadata <- read_tsv(M24_meta_fp)
Covariates will be chosen based on overlap in significantly different bacteria with adversity maaslin results without covariates at the genus level.
Possible covariates include: sequencing batch, antibiotics, probiotics, sex, delivery mode, ethnicity, duration of exclusive breastfeeding, income, gestational age at birth, age at stool sample proxy.
Same Maaslin2 default parameters as used above and 0.5% filtering threshold.
#child sex
maaslin2_sex_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/sex/sex_0.5p_results",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("sex_binary_ch"),
standardize = TRUE
)
#antibiotic use
maaslin2_antibiotics_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/antibiotics/antibiotiics_0.5p_results",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("antibio_M24"),
standardize = TRUE
)
#probiotic use
maaslin2_probiotics_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/probiotics/probiotics_0.5p_results",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("probiotics_M24"),
standardize = TRUE
)
#delivery mode
maaslin2_delivmode_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/delivery_mode/delivmode_0.5p_results",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("deliv_mode_str"),
standardize = TRUE
)
#child ethnicity
maaslin2_ethnicity_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/ethnicity/ethnicity_0.5p_results",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("mother_ethnicity"),
standardize = TRUE,
reference = 'mother_ethnicity, c(0, 1, 2)'
)
#age at stool sample
maaslin2_age_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/age/age_0.5p_results",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("c_age_days_stoolproxy_m24"),
standardize = TRUE
)
#monthly household income per member
maaslin2_income_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/income/income_0.5p_results",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("monthly_income_per_member"),
standardize = TRUE
)
#gestational age at birth
maaslin2_ga_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/gestational_age/ga_0.5p_results",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("GA"),
standardize = TRUE
)
#duration of exclusive breastfeeding
maaslin2_exclusivebf_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/duration_of_exclusive_bf/exclusivebf_0.5p_results",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("only_bf_months"),
standardize = TRUE,
reference = 'only_bf_months, c(0, 1, 2, 4)' #provide reference level for variables with more than 2 categories
)
#duration of any breastfeeding
maaslin2_anybf_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/duration_of_any_bf/anybf_0.5p_results",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("any_bf_months"),
standardize = TRUE,
reference = 'any_bf_months, c(0, 1, 2, 3)'
)
#sequencing batch
maaslin2_seqbatch_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/sequencing_batch/seqbatch_0.5p_results",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("Sequencing_batch"),
standardize = TRUE
)
Significant covariates for maaslin2 adversity analyses:
Note: to perform the MaAsLin2 analyses, both an OTU file with all of the microbiome data, and a metadata file with all of the variables that will be associated with the microbiome data are needed. At the time of running these analyses, MaAsLin2 calculated the sample size, and the sample size with non-zero taxa abundance based on the OTU/taxonomy file. However, when a sample was missing some metadata, that sample would not be included in the analysis with the metadata variable (due to listwise deletion) and thus, the sample sizes would differ from that of the OTU file. This issue was brought to the attention of the developers. A simple fix to this problem would be to run MaAsLin2 on datasets with complete cases, to obtain the correct sample size. However, as we wanted to filter the data for non-zero abundance based on all samples with usable gut microbiome data, instead of subsets of the dataset based on metadata availability, we took the approach of running MaAsLin2 with all the cases included (in which instance, the sample sizes reported are incorrect) and again with complete cases for each metadata variable (in which instance the filtering for non zero abundance and thus, the results are incorrect, but the sample sizes are correct)
To reduce space, instead of writing out each version separately below, we ran each code chunk twice, swapping out the metadata file for all vs. complete cases and changing the folder name for the results (adding “_allcases” and “_completecases” for 1 and 2 respectively). Analysis objects below are saved using the all cases version for use in visualizations later, as that is the version with the coefficient estimates that we used.
#ALL ADVERSITIES TOGETHER
#check N for all adversities analyses
alpha_diversity_stress_usable %>% dplyr::count(
!is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54) & !is.na(postnatal_stress) #N is 205
)
#create complete cases dataset to determine N and N.not.0
alladv_metadata_nocovs <- M24_metadata %>%
filter(!is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54) & !is.na(postnatal_stress))
write_tsv(alladv_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_alladv_nocovs.tsv")
alladv_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_alladv_nocovs.tsv"
#All adversities -- 0.5% filtering
maaslin2_alladv_nocov_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/no_covs/all_adversities/alladv_nocovs_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("postnatal_stress", "STAI_prorated_state_pw26", "ctq_total_score_M54"),
standardize = TRUE
)
#CUMULATIVE ADVERSITY
#create complete cases dataset to determine N and N.not.0
cumulative_metadata_nocovs <- M24_metadata %>%
filter(!is.na(cumulative_adv_s))
write_tsv(cumulative_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_cumulative_nocovs.tsv")
cumulative_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_cumulative_nocovs.tsv"
#Cumulative adversity -- 1% filtering
maaslin2_cumulative_nocov_1 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/no_covs/adversity_cumulativeicity/cumulative_nocovs_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("cumulative_adv_s"),
standardize = TRUE,
reference = c("cumulative_adv_s,0 tp") #provide reference level for variables with more than 2 categories
)
#Cumulative adversity -- 0.5% filtering
maaslin2_cumulative_nocov_0.5 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/no_covs/cumulative_adversity/cumulative_nocovs_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("cumulative_adv_s"),
standardize = TRUE,
reference = c("cumulative_adv_s,0 tp")
)
#Cumulative adversity -- 0.1% filtering
maaslin2_cumulative_nocov_0.1 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/no_covs/adversity_cumulativeicity/cumulative_nocovs_0.1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cumulative_adv_s"),
standardize = TRUE,
reference = c("cumulative_adv_s,0 tp")
)
#PRECONCEPTION ADVERSITY
#create complete cases dataset to determine correct N and N.not.0
precon_metadata_nocovs <- M24_metadata %>%
filter(!is.na(ctq_total_score_M54)) #N=279
write_tsv(precon_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_precon_nocovs.tsv")
precon_meta_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_precon_nocovs.tsv"
#preconception adversity -- 0.5% filtering
maaslin_mothermalt_nocov_05 <- Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/no_covs/mother_maltreatment/maltreated_nocovs_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005, #0.5% filtering threshold
max_significance = 0.25, #q-value threshold recommended for biomarker discovery
#defaults: normalization = TSS and transform = log
analysis_method = "lm", #default
fixed_effects = c("ctq_total_score_M54"),
standardize = TRUE #this option only affects the analysis when there is greater than one predictor
)
#PRENATAL ADVERSITY
#create complete cases dataset to determine N and N.not.0
prenat_metadata_nocovs <- M24_metadata %>%
filter(!is.na(STAI_prorated_state_pw26))
write_tsv(prenat_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_prenat_nocovs.tsv")
prenat_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_prenat_nocovs.tsv"
#Prenatal adversity -- 0.5% filtering
maaslin_prenatalanx_nocov_05 <- Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/no_covs/prenatal_anxiety/prenatalanx_nocovs_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("STAI_prorated_state_pw26"),
standardize = TRUE
)
#POSTNATAL ADVERSITY
postnat_metadata_nocovs <- M24_metadata %>%
filter(!is.na(STAI_prorated_state_pw26))
write_tsv(postnat_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_postnat_nocovs.tsv")
postnat_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_postnat_nocovs.tsv"
#postnatal adversity - 0.5% filtering
maaslin2_postnatalstress_nocov_05 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/no_covs/postnatal_stress/poststress_nocovs_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("postnatal_stress"),
standardize = TRUE
)
The same process as described in the “without covariates” section is followed, in which analyses are run once with all cases (to get coefficient estimates) and once with complete cases (to get correct N and N.not.0). The same Maaslin2 default parameters as used above, and 0.1%, 0.5%, and 1% filtering thresholds are used.
#ALL ADVERSITIES TOGETHER
#create all adversity metadata
alladv_metadata <- M24_metadata %>%
filter(!is.na(postnatal_stress) & !is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54) & !is.na(mother_ethnicity) & !is.na(only_bf_months) & !is.na(probiotics_M24) & !is.na(Sequencing_batch) & !is.na(c_age_years_stoolproxy_m24)) #N=196
write_tsv(alladv_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_alladv.tsv")
alladv_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_alladv.tsv"
glimpse(alladv_metadata)
#All adversities -- 1% filtering threshold
maaslin2_alladv_withcov_1 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/all_adversities/alladv_wcovs_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("postnatal_stress_s", "STAI_prorated_state_pw26", "ctq_total_score_M54", "mother_ethnicity", "only_bf_months", "c_age_years_stoolproxy_m24", "Sequencing_batch"),
standardize = TRUE,
reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)')
)
#All adversities -- 0.5% filtering threshold
maaslin2_alladv_withcov_05 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/all_adversities/alladv_wcovs_0.5p_results_allcases_updated",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("postnatal_stress_s", "STAI_prorated_state_pw26", "ctq_total_score_M54", "mother_ethnicity", "only_bf_months", "c_age_years_stoolproxy_m24", "Sequencing_batch"),
standardize = TRUE,
reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)')
)
#All adversities -- 0.1% filtering threshold
maaslin2_alladv_withcov_01 = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = alladv_metadata_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/all_adversities/alladv_wcovs_0.1p_results_completecases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("postnatal_stress_s", "STAI_prorated_state_pw26", "ctq_total_score_M54", "mother_ethnicity", "only_bf_months", "c_age_years_stoolproxy_m24", "Sequencing_batch"),
standardize = TRUE,
reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)')
)
There were no selected covariates for cumulative adversity, so we only ran analyses for cumulative adversity not including covariates.
#PRECONCEPTION ADVERSITY
#create complete metadata files to be used for these
precon_metadata <- M24_metadata %>%
filter(!is.na(ctq_total_score_M54) & !is.na(Sequencing_batch) & !is.na(only_bf_months) & !is.na(mother_ethnicity))
write_tsv(precon_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_precon.tsv")
precon_meta_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_precon.tsv"
#preconception adversity - 1% filtering threshold
maaslin_maltreatment_covs_1p <- Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/with_covs/mother_maltreatment/mothermalt_wcovs_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("mother_ethnicity", "only_bf_months", "ctq_total_score_M54", "Sequencing_batch"),
standardize = TRUE, #because we have multiple continuous metadata variables
reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)')
)
#preconception adversity -- 0.5% filtering threshold
maaslin_maltreatment_covs_0.5p <- Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/mother_maltreatment/mothermalt_wcovs_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("mother_ethnicity", "only_bf_months", "ctq_total_score_M54", "Sequencing_batch"),
standardize = TRUE,
reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)')
)
#preconception adversity -- 0.1% filtering threshold
maaslin_maltreatment_covs_0.1p <- Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = precon_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/mother_maltreatment/mothermalt_wcovs_0.1p_results_completecases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("mother_ethnicity", "only_bf_months", "ctq_total_score_M54", "Sequencing_batch"),
standardize = TRUE,
reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)')
)
#PRENATAL ADVERSITY
#create prenatal adversity metadata
prenat_metadata <- M24_metadata %>%
filter(!is.na(STAI_prorated_state_pw26) & !is.na(Sequencing_batch) & !is.na(only_bf_months) & !is.na(probiotics_M24) & !is.na(mother_ethnicity)) #N=420
write_tsv(prenat_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_prenat.tsv")
prenat_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_prenat.tsv"
#prenatal adversity -- 1% filtering threshold
maaslin_prenatanx_covs_1p <- Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/prenatal_anxiety/prenatanx_wcovs_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("mother_ethnicity", "only_bf_months", "probiotics_M24", "STAI_prorated_state_pw26", "Sequencing_batch"),
standardize = TRUE,
reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)')
)
#prenatal adversity -- 0.5% filtering threshold
maaslin_prenatanx_covs_0.5p <- Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/prenatal_anxiety/prenatanx_wcovs_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("mother_ethnicity", "only_bf_months", "probiotics_M24", "STAI_prorated_state_pw26", "Sequencing_batch"),
standardize = TRUE,
reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)')
)
#prenatal adversity -- 0.1% filtering threshold
maaslin_prenatanx_covs_0.1p <- Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/prenatal_anxiety/prenatanx_wcovs_0.1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("mother_ethnicity", "only_bf_months", "probiotics_M24", "STAI_prorated_state_pw26", "Sequencing_batch"),
standardize = TRUE,
reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)')
)
#POSTNATAL ADVERSITY
#create postnatal adversity metadata
postnat_metadata <- M24_metadata %>%
filter(!is.na(postnatal_stress) & !is.na(mother_ethnicity)) #N=310
write_tsv(postnat_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_postnat.tsv")
postnat_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_postnat.tsv"
#postnatal adversity -- 1% filtering threshold
maaslin_poststress_covs_1p <- Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/postnatal_stress/poststress_wcovs_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("mother_ethnicity", "postnatal_stress_s"),
standardize = TRUE,
reference = 'mother_ethnicity, c(0, 1, 2)'
)
#Postnatal adversity -- 0.05% filtering threshold
maaslin_poststress_covs_0.5p <- Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/postnatal_stress/poststress_wcovs_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("mother_ethnicity", "postnatal_stress_s"),
standardize = TRUE,
reference = 'mother_ethnicity, c(0, 1, 2)'
)
#Postnatal adversity -- 0.01% filtering threshold
maaslin_poststress_covs_0.1p <- Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/postnatal_stress/poststress_wcovs_0.1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("mother_ethnicity", "postnatal_stress"),
standardize = TRUE,
reference = 'mother_ethnicity, c(0, 1, 2)'
)
Note: Covariates will be chosen based on overlap in these results (at the genus level) with the DA analyses for possible covariates that were run in the “Covariate Selection” chunks above.
The same process as described in the adversity differential abundance sections is followed, in which analyses are run once with all cases (to get coefficient estimates) and once with complete cases (to get correct N and N.not.0). The same Maaslin2 default parameters are used as well.
#INTERNALIZING PROBLEMS
#create complete cases dataset to determine N and N.not.0
int2_metadata_nocovs <- M24_metadata %>%
filter(!is.na(cbclintprobtot_m24_std) & !is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26)) #N=151
write_tsv(int2_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_int2_nocovs.tsv")
int2_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_int2_nocovs.tsv"
# Internalizing problems - 0.5% filtering threshold
maaslin_int_0.5p_nocovs = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/no_covs/int_raw_0.5p_results_nocovs_adversities_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("cbclintprobtot_m24_std", "ctq_total_score_M54", "STAI_prorated_state_pw26"), #correlated adversity variables are included
standardize = TRUE
)
#DEVELOPMENTAL PROBLEMS
#create complete cases dataset to determine N and N.not.0
dev2_metadata_nocovs <- M24_metadata %>%
filter(!is.na(cbcldevprobtot_m24_std) &!is.na(STAI_prorated_state_pw26))
write_tsv(dev2_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_dev2_nocovs.tsv")
dev2_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_dev2_nocovs.tsv"
#developmental problems - 0.5% filtering threshold
maaslin_dev2_0.5p_nocovs = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/no_covs/dev_raw_0.5p_results_nocovs_adversities_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("cbcldevprobtot_m24_std", "STAI_prorated_state_pw26"),
standardize = TRUE
)
#TOTAL PROBLEMS
#create complete cases dataset to determine N and N.not.0
tot2_metadata_nocovs <- M24_metadata %>%
filter(!is.na(cbcltotprobtot_m24_std) & !is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26)) #N=151
write_tsv(tot2_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_tot2_nocovs.tsv")
tot2_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_tot2_nocovs.tsv"
#Total problems - 0.5% filtering threshold
maaslin_tot_0.5p_nocovs = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/no_covs/tot_raw_0.5p_results_nocovs_adversities_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("cbcltotprobtot_m24_std", "ctq_total_score_M54", "STAI_prorated_state_pw26"),
standardize = TRUE
)
Selected covariates:
#ADHD PROBLEMS
adhd4_metadata_nocovs <- M24_metadata %>%
filter(!is.na(cbcladhdprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54))
write_tsv(adhd4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_adhd4_nocovs.tsv")
adhd4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_adhd4_nocovs.tsv"
#ADHD problems - 0.5% filtering threshold
maaslin_adhd4_0.5p_nocovs = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/adhd_raw_0.5p_results_nocovs_adversities_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("cbcladhdprobtot_y4_std", "STAI_prorated_state_pw26", "ctq_total_score_M54"),
standardize = TRUE
)
#ATTENTION PROBLEMS
attn4_metadata_nocovs <- M24_metadata %>%
filter(!is.na(cbclattprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54))
write_tsv(attn4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_attn4_nocovs.tsv")
attn4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_attn4_nocovs.tsv"
#Attn problems - 0.5% filtering threshold
maaslin_att4_0.5p_nocovs = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/attn_raw_0.5p_results_nocovs_adversities_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbclattprobtot_y4_std", "STAI_prorated_state_pw26", "ctq_total_score_M54"),
standardize = TRUE
)
#DEVELOPMENTAL PROBLEMS
#create complete cases dataset to determine N and N.not.0
dev4_metadata_nocovs <- M24_metadata %>%
filter(!is.na(cbcldevprobtot_y4_std) & !is.na(STAI_prorated_state_pw26)) #N=289
write_tsv(dev4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_dev4_nocovs.tsv")
dev4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_dev4_nocovs.tsv"
#Dev problems - 0.5% filtering threshold
maaslin_dev4_0.5p_nocovs = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = dev4_metadata_nocovs_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/dev_raw_0.5p_results_nocovs_adversities_completecases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbcldevprobtot_y4_std", "STAI_prorated_state_pw26"),
standardize = TRUE
)
#EXTERNALIZING PROBLEMS
ext4_metadata_nocovs <- M24_metadata %>%
filter(!is.na(cbclextprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54))
write_tsv(ext4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_ext4_nocovs.tsv")
ext4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_ext4_nocovs.tsv"
#Externalizing - 0.5% filtering threshold
maaslin_ext4_0.5p_nocovs = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/ext_raw_0.5p_results_nocovs_adversities_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbclextprobtot_y4_std", "STAI_prorated_state_pw26", "ctq_total_score_M54"),
standardize = TRUE
)
#INTERNALIZING PROBLEMS
#create complete cases dataset to determine N and N.not.0
int4_metadata_nocovs <- M24_metadata %>%
filter(!is.na(cbclintprobtot_y4_std) & !is.na(STAI_prorated_state_pw26)) #N=289
write_tsv(int4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_int4_nocovs.tsv")
int4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_int4_nocovs.tsv"
#Internalizing problems - 0.5% filtering threshold
maaslin_int4_0.5p_nocovs = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/int_raw_0.5p_results_nocovs_adversities_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("cbclintprobtot_y4_std", "STAI_prorated_state_pw26"),
standardize = TRUE
)
#SLEEP PROBLEMS
#create complete cases dataset to determine N and N.not.0
sleep4_metadata_nocovs <- M24_metadata %>%
filter(!is.na(cbclsleepprobtot_y4_std) & !is.na(STAI_prorated_state_pw26)) #N=289
write_tsv(sleep4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_sleep4_nocovs.tsv")
sleep4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_sleep4_nocovs.tsv"
#Sleep problems - 0.5% filtering threshold
maaslin_sleep4_0.5p_nocovs = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = sleep4_metadata_nocovs_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/sleep_raw_0.5p_results_nocovs_adversities_completecases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbclsleepprobtot_y4_std", "STAI_prorated_state_pw26"),
standardize = TRUE
)
#TOTAL PROBLEMS
tot4_metadata_nocovs <- M24_metadata %>%
filter(!is.na(cbcltotprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54))
write_tsv(tot4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_tot4_nocovs.tsv")
tot4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_tot4_nocovs.tsv"
#Total problems - 0.5% filtering threshold
maaslin_tot4_0.5p_nocovs = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/tot_raw_0.5p_results_nocovs_adversities_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("cbcltotprobtot_y4_std", "STAI_prorated_state_pw26", "ctq_total_score_M54"),
standardize = TRUE
)
Selected covariates:
The same process as described in the adversity differential abundance sections is followed, in which analyses are run once with all cases (to get coefficient estimates) and once with complete cases (to get correct N and N.not.0). The same Maaslin2 default parameters are used as well. #### Developmental Problems
#DEVELOPMENTAL PROBLEMS
dev2_metadata <- M24_metadata %>%
filter(!is.na(cbcldevprobtot_m24_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(deliv_mode_str) & !is.na(only_bf_months) & !is.na(Sequencing_batch)) #N=166
write_tsv(dev2_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_devM24.tsv")
dev2_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_devM24.tsv"
#developmental problems - 1% filtering threshold
maaslin_dev2_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/dev_raw_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbcldevprobtot_m24_std", "STAI_prorated_state_pw26", "sex_binary_ch", "deliv_mode_str", "only_bf_months", "Sequencing_batch"),
standardize = TRUE
)
#developmental problems - 0.5% filtering threshold
maaslin_dev2_0.5p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/dev_raw_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbcldevprobtot_m24_std", "STAI_prorated_state_pw26", "sex_binary_ch", "deliv_mode_str", "only_bf_months", "Sequencing_batch"),
standardize = TRUE
)
#developmental problems - 0.1% filtering threshold
maaslin_dev2_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/dev_raw_0.1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbcldevprobtot_m24_std", "STAI_prorated_state_pw26", "sex_binary_ch", "deliv_mode_str", "only_bf_months", "Sequencing_batch"),
standardize = TRUE
)
#INTERNALIZING PROBLESM
#create metadata file with complete cases for internalizing
int2_metadata <- M24_metadata %>%
filter(!is.na(cbclintprobtot_m24_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary)) #N=171
write_tsv(int2_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_intM24.tsv")
int2_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_intM24.tsv"
# Internalizing problems - 0.5% filtering threshold
maaslin_int_0.5p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/int_raw_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbclintprobtot_m24_std", "STAI_prorated_state_pw26", "sex_binary_ch"),
standardize = TRUE
)
# Internalizing problems - 1% filtering threshold
maaslin_int_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/int_raw_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbclintprobtot_m24_std", "STAI_prorated_state_pw26", "sex_binary_ch"),
standardize = TRUE
)
# Internalizing problems - 0.1% filtering threshold
maaslin_int_0.1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/int_raw_0.1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbclintprobtot_m24_std", "STAI_prorated_state_pw26", "sex_binary_ch"),
standardize = TRUE
)
#TOTAL PROBLEMS
#create metadata file with complete cases for total
tot2_metadata <- M24_metadata %>%
filter(!is.na(cbcltotprobtot_m24_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(Sequencing_batch) & !is.na(ctq_total_score_M54)) #N=150
write_tsv(tot2_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_totM24.tsv")
tot2_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_totM24.tsv"
#Total problems - 0.5% filtering threshold
maaslin_tot_0.5p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/tot_raw_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbcltotprobtot_m24_std", "ctq_total_score_M54", "STAI_prorated_state_pw26", "sex_binary_ch", "Sequencing_batch"),
standardize = TRUE
)
#Total problems - 1% filtering threshold
maaslin_tot_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/tot_raw_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbcltotprobtot_m24_std", "ctq_total_score_M54", "STAI_prorated_state_pw26", "sex_binary_ch", "Sequencing_batch"),
standardize = TRUE
)
#Total problems - 0.1% filtering threshold
maaslin_tot_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/tot_raw_0.1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("cbcltotprobtot_m24_std", "ctq_total_score_M54", "STAI_prorated_state_pw26", "sex_binary_ch", "Sequencing_batch"),
standardize = TRUE
)
The same process as described in the adversity differential abundance sections is followed, in which analyses are run once with all cases (to get coefficient estimates) and once with complete cases (to get correct N and N.not.0). The same Maaslin2 default parameters are used as well. #### ADHD Problems
#ADHD PROBLEMS
adhd4_metadata <- M24_metadata %>%
filter(!is.na(cbcladhdprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(ctq_total_score_M54)) #N=201
write_tsv(adhd4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_adhdY4.tsv")
adhd4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_adhdY4.tsv"
#ADHD problems - 0.5% filtering threshold
maaslin_adhd4_0.5p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/adhd_raw_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbcladhdprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#ADHD problems - 1% filtering threshold
maaslin_adhd4_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/adhd_raw_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbcladhdprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#ADHD problems - 0.1% filtering
maaslin_adhd4_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/adhd_raw_0.1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbcladhdprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#ATTENTION PROBLEMS
attn4_metadata <- M24_metadata %>%
filter(!is.na(cbclattprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(ctq_total_score_M54)) #N=202
write_tsv(attn4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_attnY4.tsv")
attn4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_attnY4.tsv"
#Attention problems - 0.5% filtering threshold
maaslin_att4_0.5p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/attn_raw_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbclattprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#Attention problems - 1% filtering threshold
maaslin_att4_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/attn_raw_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbclattprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#Attention problems - 0.1% filtering threshold
maaslin_att4_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/attn_raw_0.1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbclattprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#DEVELOPMENTAL PROBLEMS
dev4_metadata <- M24_metadata %>%
filter(!is.na(cbcldevprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary)) #N=289
write_tsv(dev4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_devY4.tsv")
dev4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_devY4.tsv"
#Developmental problems - 0.5% filtering threshold
maaslin_dev4_0.5p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/dev_raw_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "cbcldevprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#Developmental problems - 0.1% filtering threshold
maaslin_dev4_0.5p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = dev4_metadata_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/dev_raw_0.1p_results_completecases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "cbcldevprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#Developmental problems - 1% filtering threshold
maaslin_dev4_0.5p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = dev4_metadata_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/dev_raw_0.1p_results_completecases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "cbcldevprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#EXTERNALIZING PROBLEMS
ext4_metadata <- M24_metadata %>%
filter(!is.na(cbclattprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(ctq_total_score_M54)) #N=202
write_tsv(ext4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_extY4.tsv")
ext4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_extY4.tsv"
#Externalizing - 0.5% filtering threshold
maaslin_ext4_0.5p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/ext_raw_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbclextprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#Externalizing - 1% filtering threshold
maaslin_ext4_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/ext_raw_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbclextprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#Externalizing - 0.1% filtering threshold
maaslin_ext4_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/ext_raw_0.1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbclextprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#INTERNALIZING PROBLEMS
#get complete cases metadata file
int4_metadata <- M24_metadata %>%
filter(!is.na(cbclintprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(only_bf_months) & !is.na(Sequencing_batch) & !is.na(mother_ethnicity)) #N=282
write_tsv(int4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_intY4.tsv")
int4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_intY4.tsv"
#Internalzing problems - 0.5% filtering threshold
maaslin_int4_0.5p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/int_raw_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "cbclintprobtot_y4_std", "sex_binary_ch", "only_bf_months", "mother_ethnicity", "Sequencing_batch"),
standardize = TRUE,
reference = 'mother_ethnicity, c(0, 1, 2)'
)
#Internalzing problems - 1% filtering threshold
maaslin_int4_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/int_raw_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "cbclintprobtot_y4_std", "sex_binary_ch", "only_bf_months", "mother_ethnicity", "Sequencing_batch"),
standardize = TRUE,
reference = 'mother_ethnicity, c(0, 1, 2)'
)
#Internalzing problems - 0.1% filtering threshold
maaslin_int4_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/int_raw_0.1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "cbclintprobtot_y4_std", "sex_binary_ch", "only_bf_months", "mother_ethnicity", "Sequencing_batch"),
standardize = TRUE,
reference = 'mother_ethnicity, c(0, 1, 2)'
)
#SLEEP PROBLEMS
sleep4_metadata <- M24_metadata %>%
filter(!is.na(cbclattprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(only_bf_months) & !is.na(mother_ethnicity) & !is.na(probiotics_M24) & !is.na(Sequencing_batch)) #N=276
write_tsv(sleep4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_sleepY4.tsv")
sleep4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_sleepY4.tsv"
#Sleep problems - 0.5% filtering threshold
maaslin_sleep4_0.5p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/sleep_raw_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("STAI_prorated_state_pw26", "cbclsleepprobtot_y4_std", "sex_binary_ch", "only_bf_months", "mother_ethnicity", "probiotics_M24", "Sequencing_batch"),
standardize = TRUE,
reference = 'mother_ethnicity, c(0, 1, 2)'
)
#Sleep problems - 1% filtering threshold
maaslin_sleep4_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/sleep_raw_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "cbclsleepprobtot_y4_std", "sex_binary_ch", "only_bf_months", "mother_ethnicity", "probiotics_M24", "Sequencing_batch"),
standardize = TRUE,
reference = 'mother_ethnicity, c(0, 1, 2)'
)
#Sleep problems - 0.1% filtering threshold
maaslin_sleep4_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/sleep_raw_0.1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm", #default
fixed_effects = c("STAI_prorated_state_pw26", "cbclsleepprobtot_y4_std", "sex_binary_ch", "only_bf_months", "mother_ethnicity", "probiotics_M24", "Sequencing_batch"),
standardize = TRUE,
reference = 'mother_ethnicity, c(0, 1, 2)'
)
#TOTAL PROBLEMS
tot4_metadata <- M24_metadata %>%
filter(!is.na(cbcltotprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) ) #N=288
write_tsv(tot4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_totY4.tsv")
tot4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_totY4.tsv"
#Total problems - 0.5% filtering threshold
maaslin_tot4_0.5p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/tot_raw_0.5p_results_allcases",
min_abundance = 0,
min_prevalence = 0.005,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbcltotprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#Total problems - 1% filtering threshold
maaslin_tot4_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = M24_meta_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/tot_raw_1p_results_allcases",
min_abundance = 0,
min_prevalence = 0.01,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbcltotprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
#Total problems - 0.1% filtering threshold
maaslin_tot4_1p = Maaslin2(
input_data = tax_features_ra_fp,
input_metadata = tot4_metadata_fp,
output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/tot_raw_0.1p_results_completecases",
min_abundance = 0,
min_prevalence = 0.001,
max_significance = 0.25,
analysis_method = "lm",
fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbcltotprobtot_y4_std", "sex_binary_ch"),
standardize = TRUE
)
For graphing below
Only including taxa with N.not.0 >= 15% of all samples in the
analysis, and datasets at 0.5% filtering threshold
#Combine Adversity Results
#find cutoff Ns for each analysis
ctq_n <- alpha_diversity_stress_usable %>% dplyr::count(!is.na(ctq_total_score_M54) & !is.na(mother_ethnicity) & !is.na(only_bf_months) & !is.na(Sequencing_batch))
stai_n <- alpha_diversity_stress_usable %>% dplyr::count(!is.na(STAI_prorated_state_pw26) & !is.na(mother_ethnicity) & !is.na(only_bf_months) & !is.na(Sequencing_batch) & !is.na(probiotics_M24))
leq_n <- alpha_diversity_stress_usable %>% dplyr::count(!is.na(postnatal_stress) & !is.na(mother_ethnicity))
all_adv <- alpha_diversity_stress_usable %>% dplyr::count(!is.na(postnatal_stress) & !is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(mother_ethnicity) &!is.na(only_bf_months)&!is.na(Sequencing_batch) &!is.na(c_age_years_stoolproxy_m24))
cumu_n <- alpha_diversity_stress_usable %>% dplyr::count(!is.na(cumulative_adv))
#grab results of interest from all cases run (so N and N.not.0 are incorrect, but coefficient estimates are correct)
ctq_results <- maaslin_maltreatment_covs_0.5p$results %>%
dplyr::filter(metadata=="ctq_total_score_M54" & qval < .25 & N.not.zero >= .15*ctq_n$n[2]) %>%
mutate(
analysis = "preconception"
)
stai_results <- maaslin_prenatanx_covs_0.5p$results %>%
dplyr::filter(metadata=="STAI_prorated_state_pw26" & qval < .25 & N.not.zero >= .15*stai_n$n[2]) %>%
mutate(
analysis = "prenatal"
)
leq_results <- maaslin_poststress_covs_0.5p$results %>%
dplyr::filter(metadata=="postnatal_stress_s" & qval < .25 & N.not.zero >= .15*leq_n$n[2]) %>%
mutate(
analysis = "postnatal"
)
alladv_results <- maaslin2_alladv_withcov_05$results %>%
dplyr::filter((metadata=="ctq_total_score_M54" | metadata=="STAI_prorated_state_pw26" | metadata=="postnatal_stress_s") & qval < .25 & N.not.zero >=.15*all_adv$n[2]) %>%
mutate(
analysis = "all_adv"
)
#no significant results for cumulative, so do not include here
adversity_results <- ctq_results %>%
bind_rows(stai_results) %>%
bind_rows(leq_results) %>%
bind_rows(alladv_results)
#Combine Socioemotional Functioning Results
#Ns hardcoded from complete cases analyses to save space
#grab results of interest from all cases run (so N and N.not.0 are incorrect, but coefficient estimates are correct)
maaslin_int2_results <- maaslin_int_0.5p$results %>%
dplyr::filter(metadata=="cbclintprobtot_m24_std" & qval < .25 & N.not.zero >= .15*140) %>%
dplyr::mutate(analysis = "internalizing symptoms at age 2 years")
maaslin_dev2_results <- maaslin_dev2_0.5p$results %>%
dplyr::filter(metadata=="cbcldevprobtot_m24_std" & qval < .25 & N.not.zero >= .15*166) %>%
dplyr::mutate(analysis = "developmental problems at age 2 years")
maaslin_tot2_results <- maaslin_tot_0.5p$results %>%
dplyr::filter(metadata=="cbcltotprobtot_m24_std" & qval < .25 & N.not.zero >= .15*150) %>%
dplyr::mutate(analysis = "total problems at age 2 years")
maaslin_tot4_results <- maaslin_tot4_0.5p$results %>%
dplyr::filter(metadata=="cbcltotprobtot_y4_std" & qval < .25 & N.not.zero >= .15*289) %>%
dplyr::mutate(analysis = "total problems at age 4 years")
maaslin_att4_results <- maaslin_att4_0.5p$results %>%
dplyr::filter(metadata=="cbclattprobtot_y4_std" & qval < .25 & N.not.zero >= .15*202) %>%
dplyr::mutate(analysis = "attention problems at age 4 years")
maaslin_ext4_results <- maaslin_ext4_0.5p$results %>%
dplyr::filter(metadata=="cbclextprobtot_y4_std" & qval < .25 & N.not.zero >= .15*202) %>%
dplyr::mutate(analysis = "externalizing problems at age 4 years")
maaslin_int4_results <- maaslin_int4_0.5p$results %>%
dplyr::filter(metadata=="cbclintprobtot_y4_std" & qval < .25 & N.not.zero >= .15*282) %>%
dplyr::mutate(analysis = "internalizing problems at age 4 years")
maaslin_sleep4_results <- maaslin_sleep4_0.5p$results %>%
dplyr::filter(metadata=="cbclsleepprobtot_y4_std" & qval < .25 & N.not.zero >= .15*276) %>%
dplyr::mutate(analysis = "sleep problems at age 4 years")
maaslin_dev4_results <- maaslin_dev4_0.5p$results %>%
dplyr::filter(metadata=="cbcldevprobtot_y4_std" & qval < .25 & N.not.zero >= .15*289) %>%
dplyr::mutate(analysis = "dev problems at age 4 years")
maaslin_adhd4_results <- maaslin_adhd4_0.5p$results %>%
dplyr::filter(metadata=="cbcladhdtot_y4_std" & qval < .25 & N.not.zero >= .15*202) %>%
dplyr::mutate(analysis = "adhd problems at age 4 years")
maaslin_.5_results <-
adversity_results %>%
bind_rows(maaslin_tot2_results) %>%
bind_rows(maaslin_dev2_results) %>%
bind_rows(maaslin_int2_results) %>%
bind_rows(maaslin_tot4_results) %>%
bind_rows(maaslin_ext4_results) %>%
bind_rows(maaslin_int4_results) %>%
bind_rows(maaslin_sleep4_results) %>%
bind_rows(maaslin_dev4_results) %>%
bind_rows(maaslin_att4_results) %>%
bind_rows(maaslin_adhd4_results)
#Create new variables for graphing
#create new var: log2(exp(coeff))
maaslin_.5_results <-
maaslin_.5_results %>%
dplyr::rename(taxa = feature) %>%
dplyr::mutate(
log2FoldChange = sign(coef)*log(2*(exp(abs(coef)))),
se_min = coef-stderr,
se_max = coef+stderr,
semin_log2fc = sign(se_min)*log(2*(exp(se_min))),
semax_log2fc = sign(se_max)*log(2*(exp(se_max))),
error_margin = qt(0.975, N.not.zero - 2, lower.tail = TRUE) * stderr, # manually calculate 95% confidence interval for a regression coefficient (t-value for alpha/2, df-2 x SE)
lower_CI = coef - error_margin,
upper_CI = coef + error_margin,
lCI_log2fc = sign(lower_CI)*log(2*(exp(abs(lower_CI)))),
uCI_log2fc = sign(upper_CI)*log(2*(exp(abs(upper_CI)))),
fact_analysis = factor(analysis)
)
#create new var with shorter taxa names
maaslin_.5_results <-
maaslin_.5_results %>%
dplyr::mutate(
Taxa = case_when(str_detect(taxa, "Intestinibacter") ~ "g__Intestinibacter",
str_detect(taxa, "sensu.stricto") ~ "g__Clostridium sensu stricto",
str_detect(taxa, "Lachnoclostridium.uncul") ~ "g__Lachnoclostridium",
str_detect(taxa, "Streptococcus") ~ "g__Streptococcus",
str_detect(taxa, "Parabacteroides") ~ "g__Parabacteroides",
str_detect(taxa, "Finegoldia") ~ "g__Finegoldia",
str_detect(taxa, "Ruminococcus") ~ "g__Ruminococcus",
str_detect(taxa, "Blautia") ~ "g__Blautia",
str_detect(taxa, "Peptoclostridium") ~ "g__Peptoclostridium",
str_detect(taxa, "Veillonella") ~ "g__Veillonella",
str_detect(taxa, "UCG") ~ "f__Lachnospiraceae",
str_detect(taxa, "Faecalibacterium") ~ "g__Faecalibacterium",
str_detect(taxa, "Coprobacillus") ~ "g__Coprobacillus"
)
)
#create more specific analysis variable that specifies the significant predictor in the models with all adversities together
maaslin_.5_results <- maaslin_.5_results %>%
dplyr::mutate(
analysis = case_when(
analysis=="all_adv" & name=="STAI_prorated_state_pw26" ~ "prenatal (all adversities in model)",
analysis=="all_adv" & name=="postnatal_stress_syes" ~ "postnatal (all adversities in model)",
analysis != "all_adv" ~ analysis
)
)
write_csv(maaslin_.5_results, "../../data/analysis_datasets/fran/maaslin2/final_results/all_maaslin_results_0.5p.csv")
maaslin_.5_results <- read_csv("../../data/analysis_datasets/fran/maaslin2/final_results/all_maaslin_results_0.5p.csv")
## Rows: 15 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): taxa, metadata, value, name, analysis, fact_analysis, Taxa
## dbl (16): coef, stderr, pval, qval, N, N.not.zero, log2FoldChange, se_min, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#create dotplot for adversity variables
ggplot(data=maaslin_.5_results %>% filter(str_detect(analysis, "preconception|prenatal|postnatal")), aes(x = Taxa, y = log2FoldChange, color=factor(analysis))) +
geom_point(aes(color = factor(analysis)), size = 2, position=position_dodge(width=0.3)) +
geom_errorbar(aes(ymin=lCI_log2fc, ymax=uCI_log2fc), width=0, position=position_dodge(width=0.3)) +
scale_x_discrete(expand=c(0.2, 0)) +
coord_flip() +
ylim(-1.5, 1.5) + #needs to be ylim bc coordinates have been flipped
scale_y_continuous(breaks=seq(-1.5, 1.5, by = 0.50)) +
geom_hline(aes(yintercept = 0)) +
scale_color_discrete("Adversity Exposure") +
theme(axis.text = element_text(size = 10)) +
theme_minimal()
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
#ggsave("../../manuscripts/fran_adversity_microbiome/figures/adversity_da_figure.png", height = 4, width = 8, bg = "white")